├── .gitignore
├── README-zh.md
├── README.md
├── assets
    ├── algorithm.vsdx
    ├── cascade.png
    ├── loss.png
    ├── net.png
    ├── net_arch.vsdx
    ├── web.png
    ├── web_rst.png
    └── 算法流程.png
├── configs
    ├── deepsort.yml
    └── yolov3.yml
├── data
    └── tips.txt
├── deepsort
    ├── __init__.py
    ├── deep
    │   ├── __init__.py
    │   ├── false.png
    │   ├── feature_extractor.py
    │   ├── loss.png
    │   ├── model.py
    │   ├── train.png
    │   ├── train.py
    │   ├── true.png
    │   └── visualize.py
    ├── deep_sort.py
    └── sort
    │   ├── README.md
    │   ├── __init__.py
    │   ├── detection.py
    │   ├── iou_matching.py
    │   ├── kalman_filter.py
    │   ├── linear_assignment.py
    │   ├── nn_matching.py
    │   ├── preprocessing.py
    │   ├── track.py
    │   └── tracker.py
├── detector
    ├── FasterRCNN
    │   └── tips.txt
    ├── YOLO3
    │   ├── __init__.py
    │   ├── cfg.py
    │   ├── cfg
    │   │   ├── coco.data
    │   │   ├── coco.names
    │   │   ├── darknet19_448.cfg
    │   │   ├── tiny-yolo-voc.cfg
    │   │   ├── tiny-yolo.cfg
    │   │   ├── voc.data
    │   │   ├── voc.names
    │   │   ├── voc_gaotie.data
    │   │   ├── yolo-voc.cfg
    │   │   ├── yolo.cfg
    │   │   ├── yolo_v3.cfg
    │   │   └── yolov3-tiny.cfg
    │   ├── darknet.py
    │   ├── detect.py
    │   ├── detector.py
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── nms.py
    │   │   └── python_nms.py
    │   ├── region_layer.py
    │   ├── weight
    │   │   └── tips.txt
    │   ├── yolo_layer.py
    │   └── yolo_utils.py
    └── __init__.py
├── requirements.txt
├── utils
    ├── __init__.py
    ├── dataset_reconstruct.py
    ├── dataset_split.py
    ├── draw_bbox.py
    ├── format_factory.py
    └── parse_config.py
├── web
    ├── README.md
    ├── __init__.py
    ├── db.sqlite3
    ├── manage.py
    ├── static
    │   ├── css
    │   │   └── video-js.min.css
    │   ├── images
    │   │   └── bg.png
    │   └── js
    │   │   └── video.min.js
    ├── templates
    │   ├── show_images.html
    │   ├── show_video.html
    │   └── upload.html
    └── web
    │   ├── __init__.py
    │   ├── asgi.py
    │   ├── settings.py
    │   ├── urls.py
    │   ├── views.py
    │   └── wsgi.py
├── yolo3_deepsort.py
└── yolo3_deepsort_camera.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Folders
 2 | __pycache__/
 3 | build/
 4 | *.egg-info
 5 | .idea/
 6 | media/
 7 | web/static/images/
 8 | recording/
 9 | 
10 | # Files
11 | *.weights
12 | *.t7
13 | *.mp4
14 | *.avi
15 | *.so
16 | *.zip
17 | *.jpg
18 | *.gif
19 | 


--------------------------------------------------------------------------------
/README-zh.md:
--------------------------------------------------------------------------------
 1 | # 基于pytorch实现DeepSORT多目标跟踪
 2 | > 该仓库参考 [ZQPei的项目](https://github.com/ZQPei/deep_sort_pytorch)，我在其基础上进行了一些优化。
 3 | 
 4 | ## 环境配置
 5 | 基于Python3.6并在虚拟环境下安装如下几个核心包即可，具体见[requirements](./requirements.txt)文件即可。
 6 | 
 7 | - pytorch>=1.0
 8 | - numpy
 9 | - scipy
10 | 
11 | ## 运行脚本
12 | 使用如下命令对视频进行跟踪。
13 | 
14 | `python yolo3_deepsort.py --video_path ../data/TownCenter.avi`
15 | 
16 | 使用如下命令，打开默认摄像头，实时跟踪。
17 | 
18 | `python yolo3_deepsort_camera.py`
19 | 
20 | 
21 | ## 跟踪结果
22 | 在示例视频上跟踪效果如下图。
23 | 
24 | ![](./assets/demo.gif)
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepSORT multi-object detection implementation by pytorch
 2 | > This project refer to [ZQPei's repo](https://github.com/ZQPei/deep_sort_pytorch)，i did some optimization.
 3 | 
 4 | 
 5 | ## environment
 6 | install several following packages, more info in [requirements](./requirements.txt).
 7 | 
 8 | - pytorch>=1.0
 9 | - numpy
10 | - scipy
11 | 
12 | 
13 | ## run demo
14 | use next code in terminal to run tracking in a video file.
15 | 
16 | `python yolo3_deepsort.py --video_path ../data/TownCenter.avi`
17 | 
18 | use next code in terminal to run tracking in your camera capture.
19 | 
20 | `python yolo3_deepsort_camera.py`
21 | 
22 | ## results
23 | sample result as following picture.
24 | 
25 | ![](./assets/demo.gif)


--------------------------------------------------------------------------------
/assets/algorithm.vsdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/algorithm.vsdx


--------------------------------------------------------------------------------
/assets/cascade.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/cascade.png


--------------------------------------------------------------------------------
/assets/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/loss.png


--------------------------------------------------------------------------------
/assets/net.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/net.png


--------------------------------------------------------------------------------
/assets/net_arch.vsdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/net_arch.vsdx


--------------------------------------------------------------------------------
/assets/web.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/web.png


--------------------------------------------------------------------------------
/assets/web_rst.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/web_rst.png


--------------------------------------------------------------------------------
/assets/算法流程.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/算法流程.png


--------------------------------------------------------------------------------
/configs/deepsort.yml:
--------------------------------------------------------------------------------
 1 | DEEPSORT:
 2 |   # 预训练reid模型参数文件
 3 |   REID_CKPT: "./deepsort/deep/checkpoint/ckpt1.t7"
 4 |   # 最大余弦距离
 5 |   MAX_DIST: 0.2
 6 |   # 确认检测结果最小置信度
 7 |   MIN_CONFIDENCE: 0.3
 8 |   # NMS最大IOU
 9 |   NMS_MAX_OVERLAP: 0.5
10 |   # IOU匹配时最大IOU距离
11 |   MAX_IOU_DISTANCE: 0.7
12 |   # 最大生命周期
13 |   MAX_AGE: 50
14 |   # 确认轨迹所需帧数
15 |   N_INIT: 3
16 |   # 临时特征向量容量，课减少保证运行速度
17 |   NN_BUDGET: 100
18 |   


--------------------------------------------------------------------------------
/configs/yolov3.yml:
--------------------------------------------------------------------------------
1 | YOLOV3:
2 |   CFG: "./detector/YOLO3/cfg/yolo_v3.cfg"
3 |   WEIGHT: "./detector/YOLO3/weight/yolov3.weights"
4 |   CLASS_NAMES: "./detector/YOLO3/cfg/coco.names"
5 |   SCORE_THRESH: 0.5
6 |   NMS_THRESH: 0.4
7 | 


--------------------------------------------------------------------------------
/data/tips.txt:
--------------------------------------------------------------------------------
1 | demo videos folder


--------------------------------------------------------------------------------
/deepsort/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deep_sort import DeepSort
 2 | 
 3 | 
 4 | __all__ = ['DeepSort', 'build_tracker']
 5 | 
 6 | 
 7 | def build_tracker(cfg, use_cuda):
 8 |     return DeepSort(cfg.DEEPSORT.REID_CKPT, 
 9 |                 max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 
10 |                 nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 
11 |                 max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
12 |     
13 | 
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/deepsort/deep/__init__.py:
--------------------------------------------------------------------------------
1 | from  .feature_extractor import Net


--------------------------------------------------------------------------------
/deepsort/deep/false.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/false.png


--------------------------------------------------------------------------------
/deepsort/deep/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | from .model import Net
 7 | 
 8 | 
 9 | class Extractor(object):
10 |     def __init__(self, model_path, use_cuda=True):
11 |         self.net = Net(reid=True, num_classes=751)
12 |         self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
13 |         state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict']
14 |         self.net.load_state_dict(state_dict)
15 |         print("Loaded weights from {}.".format(model_path))
16 |         self.net.to(self.device)
17 |         self.size = (64, 128)
18 |         self.norm = transforms.Compose([
19 |             transforms.ToTensor(),
20 |             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
21 |         ])
22 | 
23 |     def _preprocess(self, im_crops):
24 |         """
25 |         特征提取器的图像预处理
26 |         归一到0-1
27 |         调整图像大小
28 |         图像标准化
29 |         Torch张量化
30 |         :param im_crops: 一个batch的RGB图像（单图需要放在列表中）
31 |         :return:
32 |         """
33 |         def _resize(im, size):
34 |             return cv2.resize(im.astype(np.float32)/255., size)
35 | 
36 |         im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float()
37 |         return im_batch
38 | 
39 |     def __call__(self, im_crops):
40 |         im_batch = self._preprocess(im_crops)
41 |         with torch.no_grad():
42 |             im_batch = im_batch.to(self.device)
43 |             features = self.net(im_batch)
44 |         return features.cpu().numpy()
45 | 
46 | 
47 | def test():
48 |     def cosine(a, b, data_is_normalized=False):
49 |         if not data_is_normalized:
50 |             a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
51 |             b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
52 |         return np.dot(a, b.T)
53 | 
54 |     img1 = cv2.cvtColor(cv2.resize(cv2.imread("1.jpg"), (64, 128)), cv2.COLOR_BGR2RGB)
55 |     img2 = cv2.cvtColor(cv2.resize(cv2.imread("2.jpg"), (64, 128)), cv2.COLOR_BGR2RGB)
56 |     img3 = cv2.cvtColor(cv2.resize(cv2.imread("3.jpg"), (64, 128)), cv2.COLOR_BGR2RGB)
57 |     imgs = [img1, img2, img3]
58 |     extractor = Extractor("checkpoint/ckpt1.t7")
59 |     feature = extractor(imgs)
60 |     a = feature[0]
61 |     b = feature[1]
62 |     c = feature[2]
63 |     import matplotlib.pyplot as plt
64 |     fig = plt.figure()
65 |     plt.subplot(1, 2, 1)
66 |     plt.imshow(img1)
67 |     plt.title(" ")
68 |     plt.subplot(1, 2, 2)
69 |     plt.imshow(img2)
70 |     fig.suptitle("Cosine similarity:" + str(cosine(a.reshape(1, -1), b.reshape(1, -1), True)[0][0]) + "\n")
71 |     plt.title(" ")
72 |     plt.savefig("true.png")
73 |     plt.show()
74 | 
75 |     import matplotlib.pyplot as plt
76 |     fig = plt.figure()
77 |     plt.subplot(1, 2, 1)
78 |     plt.imshow(img1)
79 |     plt.title(" ")
80 |     plt.subplot(1, 2, 2)
81 |     plt.imshow(img3)
82 |     fig.suptitle("Cosine similarity:" + str(cosine(a.reshape(1, -1), c.reshape(1, -1), True)[0][0]) + "\n")
83 |     plt.title(" ")
84 |     plt.savefig("false.png")
85 |     plt.show()
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     # 测试提取器
90 |     test()
91 | 
92 | 
93 | 
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/deepsort/deep/loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/loss.png


--------------------------------------------------------------------------------
/deepsort/deep/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class BasicBlock(nn.Module):
  7 |     def __init__(self, in_channels, out_channels, is_downsample=False):
  8 |         super(BasicBlock, self).__init__()
  9 |         self.is_downsample = is_downsample
 10 |         if is_downsample:
 11 |             self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=2, padding=1, bias=False)
 12 |         else:
 13 |             self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=1, padding=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(out_channels)
 15 |         self.relu = nn.ReLU(True)
 16 |         self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False)
 17 |         self.bn2 = nn.BatchNorm2d(out_channels)
 18 |         if is_downsample:
 19 |             self.downsample = nn.Sequential(
 20 |                 nn.Conv2d(in_channels, out_channels, 1, stride=2, bias=False),
 21 |                 nn.BatchNorm2d(out_channels)
 22 |             )
 23 |         elif in_channels != out_channels:
 24 |             self.downsample = nn.Sequential(
 25 |                 nn.Conv2d(in_channels, out_channels, 1, stride=1, bias=False),
 26 |                 nn.BatchNorm2d(out_channels)
 27 |             )
 28 |             self.is_downsample = True
 29 | 
 30 |     def forward(self, x):
 31 |         y = self.conv1(x)
 32 |         y = self.bn1(y)
 33 |         y = self.relu(y)
 34 |         y = self.conv2(y)
 35 |         y = self.bn2(y)
 36 |         if self.is_downsample:
 37 |             x = self.downsample(x)
 38 |         return F.relu(x.add(y), True)  # 残差连接
 39 | 
 40 | 
 41 | def make_layers(in_channels, out_channels, repeat_times, is_downsample=False):
 42 |     blocks = []
 43 |     for i in range(repeat_times):
 44 |         if i == 0:
 45 |             blocks += [BasicBlock(in_channels, out_channels, is_downsample=is_downsample), ]
 46 |         else:
 47 |             blocks += [BasicBlock(out_channels, out_channels), ]
 48 |     return nn.Sequential(*blocks)
 49 | 
 50 | 
 51 | class Net(nn.Module):
 52 |     def __init__(self, num_classes=1261, reid=False):
 53 |         """
 54 | 
 55 |         :param num_classes: 分类器层输出的类别数目，Mars数据集训练集加测试集共1261类
 56 |         :param reid: 是否为reid模式，若为True，直接返回特征向量而不做分类
 57 |         """
 58 |         super(Net, self).__init__()
 59 |         # 3 128 64
 60 |         self.conv = nn.Sequential(
 61 |             nn.Conv2d(3, 64, 3, stride=1, padding=1),
 62 |             nn.BatchNorm2d(64),
 63 |             nn.ReLU(inplace=True),
 64 |             nn.MaxPool2d(3, 2, padding=1),
 65 |         )
 66 |         # 32 64 32
 67 |         self.layer1 = make_layers(64, 64, 2, False)
 68 |         # 32 64 32
 69 |         self.layer2 = make_layers(64, 128, 2, True)
 70 |         # 64 32 16
 71 |         self.layer3 = make_layers(128, 256, 2, True)
 72 |         # 128 16 8
 73 |         self.layer4 = make_layers(256, 512, 2, True)
 74 |         # 256 8 4
 75 |         self.avgpool = nn.AvgPool2d((8, 4), 1)
 76 |         # 256 1 1
 77 |         self.reid = reid
 78 |         self.classifier = nn.Sequential(
 79 |             nn.Linear(512, 256),
 80 |             nn.BatchNorm1d(256),
 81 |             nn.ReLU(inplace=True),
 82 |             nn.Dropout(),
 83 |             nn.Linear(256, num_classes),
 84 |         )
 85 | 
 86 |     def forward(self, x):
 87 |         x = self.conv(x)
 88 |         x = self.layer1(x)
 89 |         x = self.layer2(x)
 90 |         x = self.layer3(x)
 91 |         x = self.layer4(x)
 92 |         x = self.avgpool(x)
 93 |         x = x.view(x.size(0), -1)
 94 |         # 256
 95 |         if self.reid:
 96 |             x = x / x.norm(p=2, dim=1, keepdim=True)  # 张量单位化
 97 |             return x
 98 |         # 分类器
 99 |         x = self.classifier(x)
100 |         return x
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     net = Net(reid=True)
105 |     print(net)
106 |     x = torch.randn(4, 3, 128, 64)
107 |     y = net(x)
108 |     print(y.shape)
109 | 


--------------------------------------------------------------------------------
/deepsort/deep/train.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/train.png


--------------------------------------------------------------------------------
/deepsort/deep/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | import matplotlib.pyplot as plt
  5 | import torch
  6 | import torch.backends.cudnn as cudnn
  7 | import torchvision
  8 | 
  9 | from model import Net
 10 | 
 11 | # 命令行参数配置
 12 | parser = argparse.ArgumentParser(description="Train on Mars")
 13 | parser.add_argument("--data-dir", default='/SISDC_GPFS/Home_SE/jiangm-jnu/xiaf-jnu/zhouchen/dataset/MARS-generated/', type=str)  # 修改为自己的数据集目录
 14 | parser.add_argument("--gpu-id", default=0, type=int)
 15 | parser.add_argument("--lr", default=0.1, type=float)  # 初始学习率
 16 | parser.add_argument("--interval", '-i', default=20, type=int)  # 日志输出间隔
 17 | parser.add_argument('--resume', '-r', action='store_true', default=False)  # 使用预训练模型
 18 | args = parser.parse_args()
 19 | 
 20 | # 确定训练设备
 21 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu"
 22 | 
 23 | if torch.cuda.is_available():
 24 |     cudnn.benchmark = True  # 对固定的网络结构优化
 25 | 
 26 | # 数据载入
 27 | root = args.data_dir
 28 | train_dir = os.path.join(root, "bbox_train")
 29 | test_dir = os.path.join(root, "bbox_test")
 30 | # 图像预处理
 31 | transform_train = torchvision.transforms.Compose([
 32 |     torchvision.transforms.Resize((128, 64)),  # 如果采用Market数据集这一步可以删去，Mars必须要这一步
 33 |     torchvision.transforms.RandomCrop((128, 64), padding=4),
 34 |     torchvision.transforms.RandomHorizontalFlip(),
 35 |     torchvision.transforms.ToTensor(),
 36 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 37 | ])
 38 | transform_test = torchvision.transforms.Compose([
 39 |     torchvision.transforms.Resize((128, 64)),
 40 |     torchvision.transforms.ToTensor(),
 41 |     torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
 42 | ])
 43 | 
 44 | trainloader = torch.utils.data.DataLoader(
 45 |     torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
 46 |     batch_size=128, shuffle=True
 47 | )
 48 | 
 49 | testloader = torch.utils.data.DataLoader(
 50 |     torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
 51 |     batch_size=128, shuffle=True
 52 | )
 53 | 
 54 | num_classes = len(trainloader.dataset.classes)
 55 | 
 56 | # net definition
 57 | start_epoch = 0
 58 | net = Net(num_classes=num_classes)
 59 | 
 60 | if args.resume:
 61 |     # 是否使用预训练参数
 62 |     assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
 63 |     print('Loaded pretrained weights from checkpoint file')
 64 |     checkpoint = torch.load("./checkpoint/ckpt.t7")  # 该字典含有net_dict，acc，epoch三个键
 65 |     net_dict = checkpoint['net_dict']
 66 |     net.load_state_dict(net_dict)
 67 | 
 68 | net.to(device)
 69 | 
 70 | # 使用交叉熵和SGD
 71 | criterion = torch.nn.CrossEntropyLoss()
 72 | optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
 73 | best_acc = 0.0
 74 | 
 75 | 
 76 | # train function for each epoch
 77 | def train(epoch):
 78 |     print("Epoch{}".format(epoch + 1))
 79 |     print("Training...")
 80 |     net.train()
 81 |     training_loss = 0.
 82 |     train_loss = 0.
 83 |     correct = 0
 84 |     total = 0
 85 |     interval = args.interval
 86 |     start = time.time()
 87 |     for idx, (inputs, labels) in enumerate(trainloader):
 88 |         # 前向传播
 89 |         inputs, labels = inputs.to(device), labels.to(device)
 90 |         outputs = net(inputs)
 91 |         loss = criterion(outputs, labels)
 92 | 
 93 |         # 反向传播
 94 |         optimizer.zero_grad()
 95 |         loss.backward()
 96 |         optimizer.step()
 97 | 
 98 |         # 计算指标
 99 |         training_loss += loss.item()
100 |         train_loss += loss.item()
101 |         correct += outputs.max(dim=1)[1].eq(labels).sum().item()
102 |         total += labels.size(0)
103 | 
104 |         if (idx + 1) % interval == 0:
105 |             # 固定step输出一次信息
106 |             end = time.time()
107 |             print("[Progress:{:.1f}%] time:{:.2f}s Loss:{:.5f}  Acc:{:.3f}%".format(
108 |                 100. * (idx + 1) / len(trainloader), end - start, training_loss / interval,
109 |                 100. * correct / total
110 |             ))
111 |             training_loss = 0.0
112 |             start = time.time()
113 | 
114 |     return train_loss / len(trainloader), 1. - correct / total
115 | 
116 | 
117 | def test(epoch):
118 |     global best_acc
119 |     print("Epoch{}".format(epoch + 1))
120 |     print("Testing...")
121 |     net.eval()
122 |     test_loss = 0.
123 |     correct = 0
124 |     total = 0
125 |     start = time.time()
126 |     with torch.no_grad():
127 |         for idx, (inputs, labels) in enumerate(testloader):
128 |             inputs, labels = inputs.to(device), labels.to(device)
129 |             outputs = net(inputs)
130 |             loss = criterion(outputs, labels)
131 | 
132 |             test_loss += loss.item()
133 |             correct += outputs.max(dim=1)[1].eq(labels).sum().item()
134 |             total += labels.size(0)
135 | 
136 |         end = time.time()
137 |         print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
138 |             100. * (idx + 1) / len(testloader), end - start, test_loss / len(testloader), correct, total,
139 |             100. * correct / total
140 |         ))
141 | 
142 |     # 保存训练参数
143 |     acc = 100. * correct / total
144 |     if acc > best_acc:
145 |         # 始终保留最好的参数，如果过拟合，则不保留参数
146 |         best_acc = acc
147 |         print("Saving parameters to checkpoint/ckpt.t7")
148 |         checkpoint = {
149 |             'net_dict': net.state_dict(),
150 |         }
151 |         if not os.path.isdir('checkpoint'):
152 |             os.mkdir('checkpoint')
153 |         torch.save(checkpoint, './checkpoint/ckpt.t7')
154 | 
155 |     return test_loss / len(testloader), 1. - correct / total
156 | 
157 | 
158 | # 绘制训练曲线
159 | x_epoch = []
160 | record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []}
161 | fig = plt.figure(figsize=(18, 6))
162 | ax0 = fig.add_subplot(121, title="loss")
163 | ax1 = fig.add_subplot(122, title="err")
164 | 
165 | 
166 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
167 |     global record
168 |     record['train_loss'].append(train_loss)
169 |     record['train_err'].append(train_err)
170 |     record['test_loss'].append(test_loss)
171 |     record['test_err'].append(test_err)
172 | 
173 |     x_epoch.append(epoch)
174 |     ax0.plot(x_epoch, record['train_loss'], 'bo-', label='training')
175 |     ax0.plot(x_epoch, record['test_loss'], 'ro-', label='validation')
176 |     ax1.plot(x_epoch, record['train_err'], 'bo-', label='training')
177 |     ax1.plot(x_epoch, record['test_err'], 'ro-', label='validation')
178 |     if epoch == 0:
179 |         ax0.legend()
180 |         ax1.legend()
181 |     fig.savefig("train.png")
182 | 
183 | 
184 | def lr_decay():
185 |     # 设置学习率衰减
186 |     global optimizer
187 |     for params in optimizer.param_groups:
188 |         params['lr'] *= 0.1
189 |         lr = params['lr']
190 |         print("Learning rate adjusted to {}".format(lr))
191 | 
192 | 
193 | def main():
194 |     # 训练50轮
195 |     for epoch in range(50):
196 |         train_loss, train_err = train(epoch)
197 |         test_loss, test_err = test(epoch)
198 |         draw_curve(epoch, train_loss, train_err, test_loss, test_err)
199 |         if (epoch + 1) % 20 == 0:
200 |             lr_decay()
201 | 
202 | 
203 | if __name__ == '__main__':
204 |     main()
205 | 


--------------------------------------------------------------------------------
/deepsort/deep/true.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/true.png


--------------------------------------------------------------------------------
/deepsort/deep/visualize.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Zhou Chen
 3 | Date: 2020/5/21
 4 | Desc: desc
 5 | """
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | 
 9 | def parse_txt(filepath="log_train.txt"):
10 |     loss_list = []
11 |     with open(filepath, 'r', encoding="utf8") as f:
12 |         line = f.readline().strip()
13 |         while line:
14 |             loss = float(line.split(" ")[2].split(":")[-1])
15 |             loss_list.append(loss)
16 |             line = f.readline().strip()
17 |     return loss_list
18 | 
19 | 
20 | def draw_his(loss):
21 |     plt.figure()
22 |     plt.plot(list(range(len(loss))), loss)
23 |     plt.savefig('loss.png')
24 |     plt.show()
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     rst = parse_txt()
29 |     draw_his(rst)


--------------------------------------------------------------------------------
/deepsort/deep_sort.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .deep.feature_extractor import Extractor
 5 | from .sort.nn_matching import NearestNeighborDistanceMetric
 6 | from .sort.preprocessing import non_max_suppression
 7 | from .sort.detection import Detection
 8 | from .sort.tracker import Tracker
 9 | 
10 | __all__ = ['DeepSort']
11 | 
12 | 
13 | class DeepSort(object):
14 |     def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7,
15 |                  max_age=70, n_init=3, nn_budget=100, use_cuda=True):
16 |         self.min_confidence = min_confidence
17 |         self.nms_max_overlap = nms_max_overlap
18 |         self.extractor = Extractor(model_path, use_cuda=use_cuda)
19 |         metric = NearestNeighborDistanceMetric("cosine", max_dist, nn_budget)
20 |         self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
21 |         self.height = None
22 |         self.width = None
23 | 
24 |     def update(self, bbox_xywh, confidences, ori_img):
25 |         self.height, self.width = ori_img.shape[:2]
26 |         features = self._get_features(bbox_xywh, ori_img)  # 提取深度特征
27 |         bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
28 |         detections = [Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(confidences) if
29 |                       conf > self.min_confidence]  # 只保留大于最小置信度的检测框
30 | 
31 |         # 使用非极大抑制消除部分bbox
32 |         boxes = np.array([d.tlwh for d in detections])
33 |         scores = np.array([d.confidence for d in detections])
34 |         indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
35 |         detections = [detections[i] for i in indices]
36 | 
37 |         # update tracker
38 |         self.tracker.predict()
39 |         self.tracker.update(detections)
40 | 
41 |         # output bbox identities
42 |         outputs = []
43 |         for track in self.tracker.tracks:
44 |             if not track.is_confirmed() or track.time_since_update > 1:
45 |                 continue
46 |             box = track.to_tlwh()
47 |             x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
48 |             track_id = track.track_id
49 |             outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
50 |         if len(outputs) > 0:
51 |             outputs = np.stack(outputs, axis=0)
52 |         return outputs
53 | 
54 |     @staticmethod
55 |     def _xywh_to_tlwh(bbox_xywh):
56 |         """
57 |         转化(xc, yc, w, h)为(xtl,ytl,w, h)
58 |         """
59 |         if isinstance(bbox_xywh, np.ndarray):
60 |             bbox_tlwh = bbox_xywh.copy()
61 |         elif isinstance(bbox_xywh, torch.Tensor):
62 |             bbox_tlwh = bbox_xywh.clone()
63 |         else:
64 |             bbox_tlwh = None
65 |         bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
66 |         bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
67 |         return bbox_tlwh
68 | 
69 |     def _xywh_to_xyxy(self, bbox_xywh):
70 |         x, y, w, h = bbox_xywh
71 |         x1 = max(int(x - w / 2), 0)
72 |         x2 = min(int(x + w / 2), self.width - 1)
73 |         y1 = max(int(y - h / 2), 0)
74 |         y2 = min(int(y + h / 2), self.height - 1)
75 |         return x1, y1, x2, y2
76 | 
77 |     def _tlwh_to_xyxy(self, bbox_tlwh):
78 |         x, y, w, h = bbox_tlwh
79 |         x1 = max(int(x), 0)
80 |         x2 = min(int(x + w), self.width - 1)
81 |         y1 = max(int(y), 0)
82 |         y2 = min(int(y + h), self.height - 1)
83 |         return x1, y1, x2, y2
84 | 
85 |     def _get_features(self, bbox_xywh, ori_img):
86 |         """
87 |         获得图像的检测块的深度特征
88 |         """
89 |         im_crops = []
90 |         for box in bbox_xywh:
91 |             x1, y1, x2, y2 = self._xywh_to_xyxy(box)
92 |             im = ori_img[y1:y2, x1:x2]
93 |             im_crops.append(im)
94 |         if im_crops:
95 |             features = self.extractor(im_crops)
96 |         else:
97 |             features = np.array([])
98 |         return features
99 | 


--------------------------------------------------------------------------------
/deepsort/sort/README.md:
--------------------------------------------------------------------------------
1 | # SORT跟踪算法实现
2 | 
3 | 本部分代码参考官方代码的实现


--------------------------------------------------------------------------------
/deepsort/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/sort/__init__.py


--------------------------------------------------------------------------------
/deepsort/sort/detection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Detection(object):
 5 |     """
 6 |     检测基类，不论使用什么检测框架，最后由该类处理
 7 |     """
 8 | 
 9 |     def __init__(self, tlwh, confidence, feature):
10 |         """
11 | 
12 |         :param tlwh: bbox (x, y, w, h)
13 |         :param confidence: 置信度
14 |         :param feature: 特征向量
15 |         """
16 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
17 |         self.confidence = float(confidence)
18 |         self.feature = np.asarray(feature, dtype=np.float32)
19 | 
20 |     def to_tlbr(self):
21 |         """
22 |         转换bbox为(top left bottom right)的格式即(minx miny maxx maxy)_
23 |         :return:
24 |         """
25 |         ret = self.tlwh.copy()
26 |         ret[2:] += ret[:2]
27 |         return ret
28 | 
29 |     def to_xyah(self):
30 |         """
31 |         转换bbox为(center x, center y, aspect ration, height)
32 |         :return:
33 |         """
34 |         ret = self.tlwh.copy()
35 |         ret[:2] += ret[2:] / 2
36 |         ret[2] /= ret[3]
37 |         return ret
38 | 


--------------------------------------------------------------------------------
/deepsort/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from . import linear_assignment
 3 | 
 4 | 
 5 | def iou(bbox, candidates):
 6 |     """
 7 |     计算IOU
 8 |     :param bbox: bbox like (top left x,top left y, width, height)
 9 |     :param candidates:候选跟踪框矩阵（每行一个） 格式同bbox
10 |     :return:
11 |     """
12 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
13 |     candidates_tl = candidates[:, :2]
14 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
15 | 
16 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
17 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
18 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
19 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
20 |     wh = np.maximum(0., br - tl)
21 | 
22 |     area_intersection = wh.prod(axis=1)
23 |     area_bbox = bbox[2:].prod()
24 |     area_candidates = candidates[:, 2:].prod(axis=1)
25 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
26 | 
27 | 
28 | def iou_cost(tracks, detections, track_indices=None,
29 |              detection_indices=None):
30 |     """
31 |     IOU距离
32 |     :param tracks: 一个列表的轨迹
33 |     :param detections : 一个列表的检测
34 |     :param track_indices : 一个该被匹配的轨迹下标列表
35 |     :param detection_indices : 一个该被匹配的检测下标列表
36 |     :return: 返回代价矩阵，维度(len(track_indices), len(detection_indices))
37 |         每个元素(i,j)1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])
38 |     """
39 |     if track_indices is None:
40 |         track_indices = np.arange(len(tracks))
41 |     if detection_indices is None:
42 |         detection_indices = np.arange(len(detections))
43 | 
44 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
45 |     for row, track_idx in enumerate(track_indices):
46 |         if tracks[track_idx].time_since_update > 1:
47 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
48 |             continue
49 | 
50 |         bbox = tracks[track_idx].to_tlwh()
51 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
52 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
53 |     return cost_matrix
54 | 


--------------------------------------------------------------------------------
/deepsort/sort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 本模块参考SORT论文源码实现的卡尔曼滤波
  3 | """
  4 | import numpy as np
  5 | import scipy.linalg
  6 | 
  7 | # 具有N个自由度的卡方分布的0.95分位数的表，取自matlab中chi2inv函数，作为Mahalanobis阈值
  8 | chi2inv95 = {
  9 |     1: 3.8415,
 10 |     2: 5.9915,
 11 |     3: 7.8147,
 12 |     4: 9.4877,
 13 |     5: 11.070,
 14 |     6: 12.592,
 15 |     7: 14.067,
 16 |     8: 15.507,
 17 |     9: 16.919}
 18 | 
 19 | 
 20 | class KalmanFilter(object):
 21 |     """
 22 |     图像空间预测bbox的卡尔曼滤波
 23 |     8维空间
 24 |     目标移动按照匀速模型，bbox位置作为状态空间的直接观测（线性观测模型）。
 25 |     """
 26 | 
 27 |     def __init__(self):
 28 |         ndim, dt = 4, 1.
 29 | 
 30 |         # 参数初始化
 31 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 32 |         for i in range(ndim):
 33 |             self._motion_mat[i, ndim + i] = dt
 34 |         self._update_mat = np.eye(ndim, 2 * ndim)
 35 | 
 36 |         # 模型不确定性控制权重
 37 |         self._std_weight_position = 1. / 20
 38 |         self._std_weight_velocity = 1. / 160
 39 | 
 40 |     def initiate(self, measurement):
 41 |         """
 42 |         测量中创建跟踪，本项目指的是从检测结果中创建
 43 |         初始化均值和协方差
 44 |         检测格式为(cx,cy,a,h)
 45 |         返回均值（8维）和协方差（8*8维），未观测到的速度初始化为0
 46 |         """
 47 |         mean_pos = measurement
 48 |         mean_vel = np.zeros_like(mean_pos)
 49 |         mean = np.r_[mean_pos, mean_vel]
 50 | 
 51 |         std = [
 52 |             2 * self._std_weight_position * measurement[3],
 53 |             2 * self._std_weight_position * measurement[3],
 54 |             1e-2,
 55 |             2 * self._std_weight_position * measurement[3],
 56 |             10 * self._std_weight_velocity * measurement[3],
 57 |             10 * self._std_weight_velocity * measurement[3],
 58 |             1e-5,
 59 |             10 * self._std_weight_velocity * measurement[3]]
 60 |         covariance = np.diag(np.square(std))
 61 |         return mean, covariance
 62 | 
 63 |     def predict(self, mean, covariance):
 64 |         """
 65 | 
 66 |         Parameters
 67 |         ----------
 68 |         mean 上一帧目标状态的均值向量（8维）
 69 |         covariance 上一帧目标状态的协方差矩阵（8*8维）
 70 | 
 71 |         Returns
 72 |         预测状态的相应均值和协方差
 73 |         -------
 74 | 
 75 |         """
 76 |         std_pos = [
 77 |             self._std_weight_position * mean[3],
 78 |             self._std_weight_position * mean[3],
 79 |             1e-2,
 80 |             self._std_weight_position * mean[3]]
 81 |         std_vel = [
 82 |             self._std_weight_velocity * mean[3],
 83 |             self._std_weight_velocity * mean[3],
 84 |             1e-5,
 85 |             self._std_weight_velocity * mean[3]]
 86 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
 87 | 
 88 |         mean = np.dot(self._motion_mat, mean)
 89 |         covariance = np.linalg.multi_dot((
 90 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
 91 | 
 92 |         return mean, covariance
 93 | 
 94 |     def project(self, mean, covariance):
 95 |         """
 96 |         投影状态分布到测量空间
 97 |         Parameters
 98 |         ----------
 99 |         mean 状态的均值
100 |         covariance 状态的协方差
101 | 
102 |         Returns
103 |         给定状态估计的均值和方差
104 |         -------
105 | 
106 |         """
107 |         std = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-1,
111 |             self._std_weight_position * mean[3]]
112 |         innovation_cov = np.diag(np.square(std))
113 | 
114 |         mean = np.dot(self._update_mat, mean)
115 |         covariance = np.linalg.multi_dot((
116 |             self._update_mat, covariance, self._update_mat.T))
117 |         return mean, covariance + innovation_cov
118 | 
119 |     def update(self, mean, covariance, measurement):
120 |         """
121 |         状态更新
122 |         Parameters
123 |         ----------
124 |         mean
125 |         covariance
126 |         measurement
127 | 
128 |         Returns
129 |         -------
130 | 
131 |         """
132 |         projected_mean, projected_cov = self.project(mean, covariance)
133 | 
134 |         chol_factor, lower = scipy.linalg.cho_factor(
135 |             projected_cov, lower=True, check_finite=False)
136 |         # 计算卡尔曼增益
137 |         kalman_gain = scipy.linalg.cho_solve(
138 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
139 |             check_finite=False).T
140 |         innovation = measurement - projected_mean
141 | 
142 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
143 |         new_covariance = covariance - np.linalg.multi_dot((
144 |             kalman_gain, projected_cov, kalman_gain.T))
145 |         return new_mean, new_covariance
146 | 
147 |     def gating_distance(self, mean, covariance, measurements,
148 |                         only_position=False):
149 | 
150 | 
151 |         mean, covariance = self.project(mean, covariance)
152 |         if only_position:
153 |             mean, covariance = mean[:2], covariance[:2, :2]
154 |             measurements = measurements[:, :2]
155 | 
156 |         cholesky_factor = np.linalg.cholesky(covariance)  # Cholesky分解
157 |         d = measurements - mean
158 |         z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True)
159 |         squared_maha = np.sum(z * z, axis=0)
160 |         return squared_maha
161 | 


--------------------------------------------------------------------------------
/deepsort/sort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.optimize import linear_sum_assignment as linear_assignment
  3 | from . import kalman_filter
  4 | 
  5 | INFTY_COST = 1e+5
  6 | 
  7 | 
  8 | def min_cost_matching(distance_metric, max_distance, tracks, detections, track_indices=None, detection_indices=None):
  9 |     """
 10 |     使用匈牙利算法解决线性分配问题
 11 |     Parameters
 12 |     ----------
 13 |     distance_metric  轨迹集检测和他们的下标
 14 |     max_distance 最大距离阈值，大于此距离的关联无效
 15 |     tracks
 16 |     detections
 17 |     track_indices
 18 |     detection_indices
 19 | 
 20 |     Returns
 21 |     匹配上的轨迹和检测
 22 |     未匹配的轨迹
 23 |     未匹配的检测
 24 |     -------
 25 | 
 26 |     """
 27 |     if track_indices is None:
 28 |         track_indices = np.arange(len(tracks))
 29 |     if detection_indices is None:
 30 |         detection_indices = np.arange(len(detections))
 31 | 
 32 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 33 |         return [], track_indices, detection_indices  # Nothing to match.
 34 | 
 35 |     cost_matrix = distance_metric(
 36 |         tracks, detections, track_indices, detection_indices)
 37 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 38 | 
 39 |     row_indices, col_indices = linear_assignment(cost_matrix)
 40 | 
 41 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 42 |     for col, detection_idx in enumerate(detection_indices):
 43 |         if col not in col_indices:
 44 |             unmatched_detections.append(detection_idx)
 45 |     for row, track_idx in enumerate(track_indices):
 46 |         if row not in row_indices:
 47 |             unmatched_tracks.append(track_idx)
 48 |     for row, col in zip(row_indices, col_indices):
 49 |         track_idx = track_indices[row]
 50 |         detection_idx = detection_indices[col]
 51 |         if cost_matrix[row, col] > max_distance:
 52 |             # 如果组合后的cost大于阈值还是认为不匹配，移到不匹配列表中
 53 |             unmatched_tracks.append(track_idx)
 54 |             unmatched_detections.append(detection_idx)
 55 |         else:
 56 |             matches.append((track_idx, detection_idx))
 57 |     return matches, unmatched_tracks, unmatched_detections
 58 | 
 59 | 
 60 | def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections, track_indices=None,
 61 |                      detection_indices=None):
 62 |     """
 63 |     级联匹配
 64 |     参数和返回类似上面
 65 |     Parameters
 66 |     ----------
 67 |     distance_metric
 68 |     max_distance
 69 |     cascade_depth
 70 |     tracks
 71 |     detections
 72 |     track_indices
 73 |     detection_indices
 74 | 
 75 |     Returns
 76 |     -------
 77 | 
 78 |     """
 79 | 
 80 |     if track_indices is None:
 81 |         track_indices = list(range(len(tracks)))
 82 |     if detection_indices is None:
 83 |         detection_indices = list(range(len(detections)))
 84 | 
 85 |     unmatched_detections = detection_indices
 86 |     matches = []
 87 |     for level in range(cascade_depth):
 88 |         if len(unmatched_detections) == 0:  # No detections left
 89 |             break
 90 | 
 91 |         track_indices_l = [
 92 |             k for k in track_indices
 93 |             if tracks[k].time_since_update == 1 + level
 94 |         ]
 95 |         if len(track_indices_l) == 0:  # Nothing to match at this level
 96 |             continue
 97 | 
 98 |         matches_l, _, unmatched_detections = min_cost_matching(distance_metric, max_distance, tracks, detections,
 99 |                                                                track_indices_l, unmatched_detections)
100 |         matches += matches_l
101 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
102 |     return matches, unmatched_tracks, unmatched_detections
103 | 
104 | 
105 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices,
106 |                      gated_cost=INFTY_COST, only_position=False):
107 |     """
108 |     使用马氏距离进一步筛选代价矩阵
109 |     门控矩阵的作用就是通过计算卡尔曼滤波的状态分布和测量值之间的距离对代价矩阵进行限制。
110 |     代价矩阵中的距离是Track和Detection之间的表观相似度，假如一个轨迹要去匹配两个表观特征非常相似的Detection，这样就很容易出错，
111 |     但是这个时候分别让两个Detection计算与这个轨迹的马氏距离，
112 |     并使用一个阈值gating_threshold进行限制，所以就可以将马氏距离较远的那个Detection区分开，可以降低错误的匹配。
113 |     """
114 |     gating_dim = 2 if only_position else 4
115 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
116 |     measurements = np.asarray([detections[i].to_xyah() for i in detection_indices])
117 |     for row, track_idx in enumerate(track_indices):
118 |         track = tracks[track_idx]
119 |         gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position)
120 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
121 |     return cost_matrix
122 | 


--------------------------------------------------------------------------------
/deepsort/sort/nn_matching.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def _pdist(a, b):
 5 |     """
 6 |     计算欧式距离
 7 |     Parameters
 8 |     """
 9 |     a, b = np.asarray(a), np.asarray(b)
10 |     if len(a) == 0 or len(b) == 0:
11 |         return np.zeros((len(a), len(b)))
12 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
13 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
14 |     r2 = np.clip(r2, 0., float(np.inf))
15 |     return r2
16 | 
17 | 
18 | def _cosine_distance(a, b, data_is_normalized=False):
19 |     """
20 |     计算余弦距离
21 |     """
22 |     if not data_is_normalized:
23 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
24 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
25 |     return 1. - np.dot(a, b.T)
26 | 
27 | 
28 | def _nn_euclidean_distance(x, y):
29 |     distances = _pdist(x, y)
30 |     return np.maximum(0.0, distances.min(axis=0))
31 | 
32 | 
33 | def _nn_cosine_distance(x, y):
34 |     distances = _cosine_distance(x, y)
35 |     return distances.min(axis=0)
36 | 
37 | 
38 | class NearestNeighborDistanceMetric(object):
39 | 
40 |     def __init__(self, metric, matching_threshold, budget=None):
41 |         """
42 |         metric "euclidean" or "cosine"
43 |         matching_threshold 匹配阈值，大于此认为无效匹配
44 |         budget 如果不是None，则将每个类的样本最多固定为这个数字。当达到budget大小时，删除最老的样本。
45 |         """
46 | 
47 |         if metric == "euclidean":
48 |             self._metric = _nn_euclidean_distance
49 |         elif metric == "cosine":
50 |             self._metric = _nn_cosine_distance
51 |         else:
52 |             raise ValueError("Invalid metric; must be either 'euclidean' or 'cosine'")
53 |         self.matching_threshold = matching_threshold
54 |         self.budget = budget
55 |         self.samples = {}
56 | 
57 |     def partial_fit(self, features, targets, active_targets):
58 |         """
59 |         使用新数据更新距离指标
60 |         ----------
61 |         features M维的N个特征
62 |         targets 关联目标Id的数组
63 |         active_targets 场景中当前存在的目标列表
64 |         """
65 |         for feature, target in zip(features, targets):
66 |             self.samples.setdefault(target, []).append(feature)
67 |             if self.budget is not None:
68 |                 self.samples[target] = self.samples[target][-self.budget:]
69 |         self.samples = {k: self.samples[k] for k in active_targets}
70 | 
71 |     def distance(self, features, targets):
72 |         """
73 |         比较feature和targets之间的距离，返回一个代价矩阵
74 |         在匹配阶段，将distance封装为gated_metric,进行外观信息(reid得到的深度特征)+运动信息(马氏距离用于度量两个分布相似程度)
75 | 
76 |         """
77 |         cost_matrix = np.zeros((len(targets), len(features)))
78 |         for i, target in enumerate(targets):
79 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
80 |         return cost_matrix
81 | 


--------------------------------------------------------------------------------
/deepsort/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 5 |     """
 6 |     nms算法的Python实现
 7 |     :param boxes: ROI矩阵，格式为(x, y, w, h)
 8 |     :param max_bbox_overlap:覆盖高于该值被抑制
 9 |     :param scores:检测器置信度
10 |     :return:
11 |     Examples
12 |     --------
13 | 
14 |         >>> boxes = [d.roi for d in detections]
15 |         >>> scores = [d.confidence for d in detections]
16 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
17 |         >>> detections = [detections[i] for i in indices]
18 |     """
19 |     if len(boxes) == 0:
20 |         return []
21 | 
22 |     boxes = boxes.astype(np.float)
23 |     pick = []
24 | 
25 |     x1 = boxes[:, 0]
26 |     y1 = boxes[:, 1]
27 |     x2 = boxes[:, 2] + boxes[:, 0]
28 |     y2 = boxes[:, 3] + boxes[:, 1]
29 | 
30 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
31 |     if scores is not None:
32 |         idxs = np.argsort(scores)
33 |     else:
34 |         idxs = np.argsort(y2)
35 | 
36 |     while len(idxs) > 0:
37 |         last = len(idxs) - 1
38 |         i = idxs[last]
39 |         pick.append(i)
40 | 
41 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
42 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
43 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
44 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
45 | 
46 |         w = np.maximum(0, xx2 - xx1 + 1)
47 |         h = np.maximum(0, yy2 - yy1 + 1)
48 | 
49 |         overlap = (w * h) / area[idxs[:last]]
50 | 
51 |         idxs = np.delete(
52 |             idxs, np.concatenate(
53 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
54 | 
55 |     return pick
56 | 


--------------------------------------------------------------------------------
/deepsort/sort/track.py:
--------------------------------------------------------------------------------
 1 | class TrackState:
 2 |     """
 3 |     轨迹状态
 4 |     """
 5 |     Tentative = 1
 6 |     Confirmed = 2
 7 |     Deleted = 3
 8 | 
 9 | 
10 | class Track:
11 |     """
12 |     包含一个轨迹的所有信息
13 |     """
14 | 
15 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
16 |                  feature=None):
17 |         self.mean = mean
18 |         self.covariance = covariance
19 |         self.track_id = track_id
20 |         self.hits = 1  # 命中次数
21 |         self.time_since_update = 0
22 | 
23 |         self.state = TrackState.Tentative  # 创建时的状态为Tentative
24 |         self.features = []
25 |         if feature is not None:
26 |             self.features.append(feature)
27 | 
28 |         self._n_init = n_init
29 |         self._max_age = max_age
30 | 
31 |     def to_tlwh(self):
32 |         """
33 |         当前目标位置，格式转换
34 |         Returns
35 |         -------
36 | 
37 |         """
38 |         ret = self.mean[:4].copy()
39 |         ret[2] *= ret[3]
40 |         ret[:2] -= ret[2:] / 2
41 |         return ret
42 | 
43 |     def to_tlbr(self):
44 |         ret = self.to_tlwh()
45 |         ret[2:] = ret[:2] + ret[2:]
46 |         return ret
47 | 
48 |     def predict(self, kf):
49 |         """
50 |         使用卡尔曼滤波进行状态预测
51 |         """
52 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
53 |         self.time_since_update += 1  # 每次预测自增1
54 | 
55 |     def update(self, kf, detection):
56 |         """
57 |         进行相关矩阵和数据的更新
58 |         """
59 |         self.mean, self.covariance = kf.update(
60 |             self.mean, self.covariance, detection.to_xyah())
61 |         self.features.append(detection.feature)
62 | 
63 |         self.hits += 1
64 |         self.time_since_update = 0
65 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
66 |             self.state = TrackState.Confirmed
67 | 
68 |     def mark_missed(self):
69 |         if self.state == TrackState.Tentative:
70 |             self.state = TrackState.Deleted
71 |         elif self.time_since_update > self._max_age:
72 |             self.state = TrackState.Deleted
73 | 
74 |     def is_tentative(self):
75 |         """
76 |         该轨迹是否为tentative（临时存在）
77 |         """
78 |         return self.state == TrackState.Tentative
79 | 
80 |     def is_confirmed(self):
81 |         """
82 |         该轨迹是否确认
83 |         """
84 |         return self.state == TrackState.Confirmed
85 | 
86 |     def is_deleted(self):
87 |         """
88 |         该轨迹是否删除
89 |         """
90 |         return self.state == TrackState.Deleted
91 | 


--------------------------------------------------------------------------------
/deepsort/sort/tracker.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from . import kalman_filter
  3 | from . import linear_assignment
  4 | from . import iou_matching
  5 | from .track import Track
  6 | 
  7 | 
  8 | class Tracker:
  9 |     """
 10 |     多目标跟踪器实现
 11 |     """
 12 | 
 13 |     def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
 14 |         self.metric = metric
 15 |         self.max_iou_distance = max_iou_distance
 16 |         self.max_age = max_age
 17 |         self.n_init = n_init
 18 | 
 19 |         self.kf = kalman_filter.KalmanFilter()
 20 |         self.tracks = []
 21 |         self._next_id = 1
 22 | 
 23 |     def predict(self):
 24 |         """
 25 |         状态预测
 26 |         """
 27 |         for track in self.tracks:
 28 |             track.predict(self.kf)
 29 | 
 30 |     def update(self, detections):
 31 |         """
 32 |         状态更新
 33 |         """
 34 |         # 级联匹配
 35 |         matches, unmatched_tracks, unmatched_detections = self._match(detections)
 36 | 
 37 |         for track_idx, detection_idx in matches:
 38 |             # 成功匹配的要用检测结果更新对于track的参数
 39 |             # 包括
 40 |             #   更新卡尔曼滤波一系列运动变量、命中次数以及重置time_since_update
 41 |             #   检测的深度特征保存到track的特征集中
 42 |             #   连续命中三帧，将track状态由tentative改为confirmed
 43 | 
 44 |             self.tracks[track_idx].update(
 45 |                 self.kf, detections[detection_idx])
 46 |         for track_idx in unmatched_tracks:
 47 |             # 未成功匹配的track
 48 |             #   若未经过confirm则删除
 49 |             #   若已经confirm但连续max_age帧未匹配到检测结果也删除
 50 |             self.tracks[track_idx].mark_missed()
 51 |         for detection_idx in unmatched_detections:
 52 |             # 未匹配的检测，为其创建新的track
 53 |             self._initiate_track(detections[detection_idx])
 54 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 55 | 
 56 |         # Update distance metric.
 57 |         # 更新已经确认的track的特征集
 58 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 59 |         features, targets = [], []
 60 |         for track in self.tracks:
 61 |             if not track.is_confirmed():
 62 |                 continue
 63 |             features += track.features
 64 |             targets += [track.track_id for _ in track.features]
 65 |             track.features = []
 66 |         self.metric.partial_fit(
 67 |             np.asarray(features), np.asarray(targets), active_targets)
 68 | 
 69 |     def _match(self, detections):
 70 |         """
 71 |         跟踪结果和检测结果的匹配
 72 |         :param detections:
 73 |         :return:
 74 |         """
 75 | 
 76 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 77 |             features = np.array([dets[i].feature for i in detection_indices])
 78 |             targets = np.array([tracks[i].track_id for i in track_indices])
 79 |             # 通过最近邻计算余弦距离代价矩阵
 80 |             cost_matrix = self.metric.distance(features, targets)
 81 |             # 计算马氏距离，得到新的代价矩阵
 82 |             cost_matrix = linear_assignment.gate_cost_matrix(
 83 |                 self.kf, cost_matrix, tracks, dets, track_indices,
 84 |                 detection_indices)
 85 | 
 86 |             return cost_matrix
 87 | 
 88 |         # 将track分为确认track和未确认track
 89 |         confirmed_tracks = [
 90 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
 91 |         unconfirmed_tracks = [
 92 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
 93 | 
 94 |         # 将确认的track和检测结果进行级联匹配（使用外观特征）
 95 |         matches_a, unmatched_tracks_a, unmatched_detections = linear_assignment.matching_cascade(
 96 |             gated_metric, self.metric.matching_threshold, self.max_age,
 97 |             self.tracks, detections, confirmed_tracks)
 98 | 
 99 |         # 将上一步未成功匹配的track和未确认的track组合到一起形成iou_track_candidates于还没有匹配结果的检测结果进行IOU匹配
100 |         iou_track_candidates = unconfirmed_tracks + [
101 |             k for k in unmatched_tracks_a if
102 |             self.tracks[k].time_since_update == 1]
103 |         unmatched_tracks_a = [
104 |             k for k in unmatched_tracks_a if
105 |             self.tracks[k].time_since_update != 1]
106 |         # 计算两两之间的iou，再通过1-iou得到cost matrix
107 |         matches_b, unmatched_tracks_b, unmatched_detections = linear_assignment.min_cost_matching(
108 |             iou_matching.iou_cost, self.max_iou_distance, self.tracks,
109 |             detections, iou_track_candidates, unmatched_detections)
110 | 
111 |         matches = matches_a + matches_b  # 组合获得当前所有匹配结果
112 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
113 |         return matches, unmatched_tracks, unmatched_detections
114 | 
115 |     def _initiate_track(self, detection):
116 |         """
117 |         初始化新的跟踪器，对应新的检测结果
118 |         :param detection:
119 |         :return:
120 |         """
121 |         # 初始化卡尔曼
122 |         mean, covariance = self.kf.initiate(detection.to_xyah())
123 |         # 创建新的跟踪器
124 |         self.tracks.append(Track(
125 |             mean, covariance, self._next_id, self.n_init, self.max_age,
126 |             detection.feature))
127 |         # id自增
128 |         self._next_id += 1
129 | 


--------------------------------------------------------------------------------
/detector/FasterRCNN/tips.txt:
--------------------------------------------------------------------------------
1 | gitignore this folder


--------------------------------------------------------------------------------
/detector/YOLO3/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append("detector/YOLO3")
 3 | 
 4 | 
 5 | from .detector import YOLOv3
 6 | __all__ = ['YOLOv3']
 7 | 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from .yolo_utils import convert2cpu
  3 | import os
  4 | 
  5 | 
  6 | def parse_cfg(cfgfile):
  7 |     blocks = []
  8 |     fp = open(cfgfile, 'r')
  9 |     block = None
 10 |     line = fp.readline()
 11 |     while line != '':
 12 |         line = line.rstrip()
 13 |         if line == '' or line[0] == '#':
 14 |             line = fp.readline()
 15 |             continue
 16 |         elif line[0] == '[':
 17 |             if block:
 18 |                 blocks.append(block)
 19 |             block = dict()
 20 |             block['type'] = line.lstrip('[').rstrip(']')
 21 |             # set default value
 22 |             if block['type'] == 'convolutional':
 23 |                 block['batch_normalize'] = 0
 24 |         else:
 25 |             key, value = line.split('=')
 26 |             key = key.strip()
 27 |             if key == 'type':
 28 |                 key = '_type'
 29 |             value = value.strip()
 30 |             block[key] = value
 31 |         line = fp.readline()
 32 | 
 33 |     if block:
 34 |         blocks.append(block)
 35 |     fp.close()
 36 |     return blocks
 37 | 
 38 | 
 39 | def print_cfg(blocks):
 40 |     print('layer     filters    size              input                output');
 41 |     prev_width = 416
 42 |     prev_height = 416
 43 |     prev_filters = 3
 44 |     out_filters = []
 45 |     out_widths = []
 46 |     out_heights = []
 47 |     ind = -2
 48 |     for block in blocks:
 49 |         ind = ind + 1
 50 |         if block['type'] == 'net':
 51 |             prev_width = int(block['width'])
 52 |             prev_height = int(block['height'])
 53 |             continue
 54 |         elif block['type'] == 'convolutional':
 55 |             filters = int(block['filters'])
 56 |             kernel_size = int(block['size'])
 57 |             stride = int(block['stride'])
 58 |             is_pad = int(block['pad'])
 59 |             pad = (kernel_size - 1) // 2 if is_pad else 0
 60 |             width = (prev_width + 2 * pad - kernel_size) // stride + 1
 61 |             height = (prev_height + 2 * pad - kernel_size) // stride + 1
 62 |             print('%5d %-6s %4d  %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
 63 |             ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width,
 64 |             height, filters))
 65 |             prev_width = width
 66 |             prev_height = height
 67 |             prev_filters = filters
 68 |             out_widths.append(prev_width)
 69 |             out_heights.append(prev_height)
 70 |             out_filters.append(prev_filters)
 71 |         elif block['type'] == 'maxpool':
 72 |             pool_size = int(block['size'])
 73 |             stride = int(block['stride'])
 74 |             width = prev_width // stride
 75 |             height = prev_height // stride
 76 |             print('%5d %-6s       %d x %d / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
 77 |             ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, filters))
 78 |             prev_width = width
 79 |             prev_height = height
 80 |             prev_filters = filters
 81 |             out_widths.append(prev_width)
 82 |             out_heights.append(prev_height)
 83 |             out_filters.append(prev_filters)
 84 |         elif block['type'] == 'avgpool':
 85 |             width = 1
 86 |             height = 1
 87 |             print('%5d %-6s                   %3d x %3d x%4d   ->  %3d' % (
 88 |             ind, 'avg', prev_width, prev_height, prev_filters, prev_filters))
 89 |             prev_width = width
 90 |             prev_height = height
 91 |             prev_filters = filters
 92 |             out_widths.append(prev_width)
 93 |             out_heights.append(prev_height)
 94 |             out_filters.append(prev_filters)
 95 |         elif block['type'] == 'softmax':
 96 |             print('%5d %-6s                                    ->  %3d' % (ind, 'softmax', prev_filters))
 97 |             out_widths.append(prev_width)
 98 |             out_heights.append(prev_height)
 99 |             out_filters.append(prev_filters)
100 |         elif block['type'] == 'cost':
101 |             print('%5d %-6s                                     ->  %3d' % (ind, 'cost', prev_filters))
102 |             out_widths.append(prev_width)
103 |             out_heights.append(prev_height)
104 |             out_filters.append(prev_filters)
105 |         elif block['type'] == 'reorg':
106 |             stride = int(block['stride'])
107 |             filters = stride * stride * prev_filters
108 |             width = prev_width // stride
109 |             height = prev_height // stride
110 |             print('%5d %-6s             / %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
111 |             ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters))
112 |             prev_width = width
113 |             prev_height = height
114 |             prev_filters = filters
115 |             out_widths.append(prev_width)
116 |             out_heights.append(prev_height)
117 |             out_filters.append(prev_filters)
118 |         elif block['type'] == 'upsample':
119 |             stride = int(block['stride'])
120 |             filters = prev_filters
121 |             width = prev_width * stride
122 |             height = prev_height * stride
123 |             print('%5d %-6s           * %d   %3d x %3d x%4d   ->   %3d x %3d x%4d' % (
124 |             ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters))
125 |             prev_width = width
126 |             prev_height = height
127 |             prev_filters = filters
128 |             out_widths.append(prev_width)
129 |             out_heights.append(prev_height)
130 |             out_filters.append(prev_filters)
131 |         elif block['type'] == 'route':
132 |             layers = block['layers'].split(',')
133 |             layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
134 |             if len(layers) == 1:
135 |                 print('%5d %-6s %d' % (ind, 'route', layers[0]))
136 |                 prev_width = out_widths[layers[0]]
137 |                 prev_height = out_heights[layers[0]]
138 |                 prev_filters = out_filters[layers[0]]
139 |             elif len(layers) == 2:
140 |                 print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1]))
141 |                 prev_width = out_widths[layers[0]]
142 |                 prev_height = out_heights[layers[0]]
143 |                 assert (prev_width == out_widths[layers[1]])
144 |                 assert (prev_height == out_heights[layers[1]])
145 |                 prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
146 |             out_widths.append(prev_width)
147 |             out_heights.append(prev_height)
148 |             out_filters.append(prev_filters)
149 |         elif block['type'] in ['region', 'yolo']:
150 |             print('%5d %-6s' % (ind, 'detection'))
151 |             out_widths.append(prev_width)
152 |             out_heights.append(prev_height)
153 |             out_filters.append(prev_filters)
154 |         elif block['type'] == 'shortcut':
155 |             from_id = int(block['from'])
156 |             from_id = from_id if from_id > 0 else from_id + ind
157 |             print('%5d %-6s %d' % (ind, 'shortcut', from_id))
158 |             prev_width = out_widths[from_id]
159 |             prev_height = out_heights[from_id]
160 |             prev_filters = out_filters[from_id]
161 |             out_widths.append(prev_width)
162 |             out_heights.append(prev_height)
163 |             out_filters.append(prev_filters)
164 |         elif block['type'] == 'connected':
165 |             filters = int(block['output'])
166 |             print('%5d %-6s                            %d  ->  %3d' % (ind, 'connected', prev_filters, filters))
167 |             prev_filters = filters
168 |             out_widths.append(1)
169 |             out_heights.append(1)
170 |             out_filters.append(prev_filters)
171 |         else:
172 |             print('unknown type %s' % (block['type']))
173 | 
174 | 
175 | def load_conv(buf, start, conv_model):
176 |     num_w = conv_model.weight.numel()
177 |     num_b = conv_model.bias.numel()
178 |     # print("start: {}, num_w: {}, num_b: {}".format(start, num_w, num_b))
179 |     # by ysyun, use .view_as()
180 |     conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]).view_as(conv_model.bias.data));
181 |     start = start + num_b
182 |     conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data));
183 |     start = start + num_w
184 |     return start
185 | 
186 | 
187 | def save_conv(fp, conv_model):
188 |     if conv_model.bias.is_cuda:
189 |         convert2cpu(conv_model.bias.data).numpy().tofile(fp)
190 |         convert2cpu(conv_model.weight.data).numpy().tofile(fp)
191 |     else:
192 |         conv_model.bias.data.numpy().tofile(fp)
193 |         conv_model.weight.data.numpy().tofile(fp)
194 | 
195 | 
196 | def load_conv_bn(buf, start, conv_model, bn_model):
197 |     num_w = conv_model.weight.numel()
198 |     num_b = bn_model.bias.numel()
199 |     bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
200 |     start = start + num_b
201 |     bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b]));
202 |     start = start + num_b
203 |     bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b]));
204 |     start = start + num_b
205 |     bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b]));
206 |     start = start + num_b
207 |     # conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w
208 |     conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data));
209 |     start = start + num_w
210 |     return start
211 | 
212 | 
213 | def save_conv_bn(fp, conv_model, bn_model):
214 |     if bn_model.bias.is_cuda:
215 |         convert2cpu(bn_model.bias.data).numpy().tofile(fp)
216 |         convert2cpu(bn_model.weight.data).numpy().tofile(fp)
217 |         convert2cpu(bn_model.running_mean).numpy().tofile(fp)
218 |         convert2cpu(bn_model.running_var).numpy().tofile(fp)
219 |         convert2cpu(conv_model.weight.data).numpy().tofile(fp)
220 |     else:
221 |         bn_model.bias.data.numpy().tofile(fp)
222 |         bn_model.weight.data.numpy().tofile(fp)
223 |         bn_model.running_mean.numpy().tofile(fp)
224 |         bn_model.running_var.numpy().tofile(fp)
225 |         conv_model.weight.data.numpy().tofile(fp)
226 | 
227 | 
228 | def load_fc(buf, start, fc_model):
229 |     num_w = fc_model.weight.numel()
230 |     num_b = fc_model.bias.numel()
231 |     fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]));
232 |     start = start + num_b
233 |     fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]));
234 |     start = start + num_w
235 |     return start
236 | 
237 | 
238 | def save_fc(fp, fc_model):
239 |     fc_model.bias.data.numpy().tofile(fp)
240 |     fc_model.weight.data.numpy().tofile(fp)
241 | 
242 | 
243 | if __name__ == '__main__':
244 |     import sys
245 | 
246 |     blocks = parse_cfg('cfg/yolo.cfg')
247 |     if len(sys.argv) == 2:
248 |         blocks = parse_cfg(sys.argv[1])
249 |     print_cfg(blocks)
250 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/coco.data:
--------------------------------------------------------------------------------
1 | train  = coco_train.txt
2 | valid  = coco_test.txt
3 | names = data/coco.names
4 | backup = backup
5 | gpus  = 0,1,2,3
6 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/darknet19_448.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=128
  3 | subdivisions=4
  4 | height=448
  5 | width=448
  6 | max_crop=512
  7 | channels=3
  8 | momentum=0.9
  9 | decay=0.0005
 10 | 
 11 | learning_rate=0.001
 12 | policy=poly
 13 | power=4
 14 | max_batches=100000
 15 | 
 16 | angle=7
 17 | hue = .1
 18 | saturation=.75
 19 | exposure=.75
 20 | aspect=.75
 21 | 
 22 | [convolutional]
 23 | batch_normalize=1
 24 | filters=32
 25 | size=3
 26 | stride=1
 27 | pad=1
 28 | activation=leaky
 29 | 
 30 | [maxpool]
 31 | size=2
 32 | stride=2
 33 | 
 34 | [convolutional]
 35 | batch_normalize=1
 36 | filters=64
 37 | size=3
 38 | stride=1
 39 | pad=1
 40 | activation=leaky
 41 | 
 42 | [maxpool]
 43 | size=2
 44 | stride=2
 45 | 
 46 | [convolutional]
 47 | batch_normalize=1
 48 | filters=128
 49 | size=3
 50 | stride=1
 51 | pad=1
 52 | activation=leaky
 53 | 
 54 | [convolutional]
 55 | batch_normalize=1
 56 | filters=64
 57 | size=1
 58 | stride=1
 59 | pad=1
 60 | activation=leaky
 61 | 
 62 | [convolutional]
 63 | batch_normalize=1
 64 | filters=128
 65 | size=3
 66 | stride=1
 67 | pad=1
 68 | activation=leaky
 69 | 
 70 | [maxpool]
 71 | size=2
 72 | stride=2
 73 | 
 74 | [convolutional]
 75 | batch_normalize=1
 76 | filters=256
 77 | size=3
 78 | stride=1
 79 | pad=1
 80 | activation=leaky
 81 | 
 82 | [convolutional]
 83 | batch_normalize=1
 84 | filters=128
 85 | size=1
 86 | stride=1
 87 | pad=1
 88 | activation=leaky
 89 | 
 90 | [convolutional]
 91 | batch_normalize=1
 92 | filters=256
 93 | size=3
 94 | stride=1
 95 | pad=1
 96 | activation=leaky
 97 | 
 98 | [maxpool]
 99 | size=2
100 | stride=2
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | filters=512
105 | size=3
106 | stride=1
107 | pad=1
108 | activation=leaky
109 | 
110 | [convolutional]
111 | batch_normalize=1
112 | filters=256
113 | size=1
114 | stride=1
115 | pad=1
116 | activation=leaky
117 | 
118 | [convolutional]
119 | batch_normalize=1
120 | filters=512
121 | size=3
122 | stride=1
123 | pad=1
124 | activation=leaky
125 | 
126 | [convolutional]
127 | batch_normalize=1
128 | filters=256
129 | size=1
130 | stride=1
131 | pad=1
132 | activation=leaky
133 | 
134 | [convolutional]
135 | batch_normalize=1
136 | filters=512
137 | size=3
138 | stride=1
139 | pad=1
140 | activation=leaky
141 | 
142 | [maxpool]
143 | size=2
144 | stride=2
145 | 
146 | [convolutional]
147 | batch_normalize=1
148 | filters=1024
149 | size=3
150 | stride=1
151 | pad=1
152 | activation=leaky
153 | 
154 | [convolutional]
155 | batch_normalize=1
156 | filters=512
157 | size=1
158 | stride=1
159 | pad=1
160 | activation=leaky
161 | 
162 | [convolutional]
163 | batch_normalize=1
164 | filters=1024
165 | size=3
166 | stride=1
167 | pad=1
168 | activation=leaky
169 | 
170 | [convolutional]
171 | batch_normalize=1
172 | filters=512
173 | size=1
174 | stride=1
175 | pad=1
176 | activation=leaky
177 | 
178 | [convolutional]
179 | batch_normalize=1
180 | filters=1024
181 | size=3
182 | stride=1
183 | pad=1
184 | activation=leaky
185 | 
186 | [convolutional]
187 | filters=1000
188 | size=1
189 | stride=1
190 | pad=1
191 | activation=linear
192 | 
193 | [avgpool]
194 | 
195 | [softmax]
196 | groups=1
197 | 
198 | [cost]
199 | type=sse
200 | 
201 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/tiny-yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=64
  3 | subdivisions=8
  4 | width=416
  5 | height=416
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | angle=0
 10 | saturation = 1.5
 11 | exposure = 1.5
 12 | hue=.1
 13 | 
 14 | learning_rate=0.001
 15 | max_batches = 40200
 16 | policy=steps
 17 | steps=-1,100,20000,30000
 18 | scales=.1,10,.1,.1
 19 | 
 20 | [convolutional]
 21 | batch_normalize=1
 22 | filters=16
 23 | size=3
 24 | stride=1
 25 | pad=1
 26 | activation=leaky
 27 | 
 28 | [maxpool]
 29 | size=2
 30 | stride=2
 31 | 
 32 | [convolutional]
 33 | batch_normalize=1
 34 | filters=32
 35 | size=3
 36 | stride=1
 37 | pad=1
 38 | activation=leaky
 39 | 
 40 | [maxpool]
 41 | size=2
 42 | stride=2
 43 | 
 44 | [convolutional]
 45 | batch_normalize=1
 46 | filters=64
 47 | size=3
 48 | stride=1
 49 | pad=1
 50 | activation=leaky
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=128
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [maxpool]
 65 | size=2
 66 | stride=2
 67 | 
 68 | [convolutional]
 69 | batch_normalize=1
 70 | filters=256
 71 | size=3
 72 | stride=1
 73 | pad=1
 74 | activation=leaky
 75 | 
 76 | [maxpool]
 77 | size=2
 78 | stride=2
 79 | 
 80 | [convolutional]
 81 | batch_normalize=1
 82 | filters=512
 83 | size=3
 84 | stride=1
 85 | pad=1
 86 | activation=leaky
 87 | 
 88 | [maxpool]
 89 | size=2
 90 | stride=1
 91 | 
 92 | [convolutional]
 93 | batch_normalize=1
 94 | filters=1024
 95 | size=3
 96 | stride=1
 97 | pad=1
 98 | activation=leaky
 99 | 
100 | ###########
101 | 
102 | [convolutional]
103 | batch_normalize=1
104 | size=3
105 | stride=1
106 | pad=1
107 | filters=1024
108 | activation=leaky
109 | 
110 | [convolutional]
111 | size=1
112 | stride=1
113 | pad=1
114 | filters=125
115 | activation=linear
116 | 
117 | [region]
118 | anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
119 | bias_match=1
120 | classes=20
121 | coords=4
122 | num=5
123 | softmax=1
124 | jitter=.2
125 | rescore=1
126 | 
127 | object_scale=5
128 | noobject_scale=1
129 | class_scale=1
130 | coord_scale=1
131 | 
132 | absolute=1
133 | thresh = .6
134 | random=1
135 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/tiny-yolo.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Training
  3 | # batch=64
  4 | # subdivisions=2
  5 | # Testing
  6 | batch=1
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | size=3
110 | stride=1
111 | pad=1
112 | filters=512
113 | activation=leaky
114 | 
115 | [convolutional]
116 | size=1
117 | stride=1
118 | pad=1
119 | filters=425
120 | activation=linear
121 | 
122 | [region]
123 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
124 | bias_match=1
125 | classes=80
126 | coords=4
127 | num=5
128 | softmax=1
129 | jitter=.2
130 | rescore=0
131 | 
132 | object_scale=5
133 | noobject_scale=1
134 | class_scale=1
135 | coord_scale=1
136 | 
137 | absolute=1
138 | thresh = .6
139 | random=1
140 | 
141 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/voc.data:
--------------------------------------------------------------------------------
1 | train  = data/voc_train.txt
2 | valid  = data/2007_test.txt
3 | names = data/voc.names
4 | backup = backup
5 | gpus  = 3
6 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/voc.names:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/voc_gaotie.data:
--------------------------------------------------------------------------------
1 | train  = data/gaotie_trainval.txt
2 | valid  = data/gaotie_test.txt
3 | names = data/voc.names
4 | backup = backup
5 | gpus  = 3


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/yolo-voc.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=64
  4 | subdivisions=8
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | height=416
  9 | width=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 80200
 21 | policy=steps
 22 | steps=-1,500,40000,60000
 23 | scales=0.1,10,.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=125
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071
243 | bias_match=1
244 | classes=20
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/yolo.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=8
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=64
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=128
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [convolutional]
 58 | batch_normalize=1
 59 | filters=64
 60 | size=1
 61 | stride=1
 62 | pad=1
 63 | activation=leaky
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=1
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [maxpool]
 74 | size=2
 75 | stride=2
 76 | 
 77 | [convolutional]
 78 | batch_normalize=1
 79 | filters=256
 80 | size=3
 81 | stride=1
 82 | pad=1
 83 | activation=leaky
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=128
 88 | size=1
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=256
 96 | size=3
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [maxpool]
102 | size=2
103 | stride=2
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=512
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [convolutional]
114 | batch_normalize=1
115 | filters=256
116 | size=1
117 | stride=1
118 | pad=1
119 | activation=leaky
120 | 
121 | [convolutional]
122 | batch_normalize=1
123 | filters=512
124 | size=3
125 | stride=1
126 | pad=1
127 | activation=leaky
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | [convolutional]
150 | batch_normalize=1
151 | filters=1024
152 | size=3
153 | stride=1
154 | pad=1
155 | activation=leaky
156 | 
157 | [convolutional]
158 | batch_normalize=1
159 | filters=512
160 | size=1
161 | stride=1
162 | pad=1
163 | activation=leaky
164 | 
165 | [convolutional]
166 | batch_normalize=1
167 | filters=1024
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [convolutional]
174 | batch_normalize=1
175 | filters=512
176 | size=1
177 | stride=1
178 | pad=1
179 | activation=leaky
180 | 
181 | [convolutional]
182 | batch_normalize=1
183 | filters=1024
184 | size=3
185 | stride=1
186 | pad=1
187 | activation=leaky
188 | 
189 | 
190 | #######
191 | 
192 | [convolutional]
193 | batch_normalize=1
194 | size=3
195 | stride=1
196 | pad=1
197 | filters=1024
198 | activation=leaky
199 | 
200 | [convolutional]
201 | batch_normalize=1
202 | size=3
203 | stride=1
204 | pad=1
205 | filters=1024
206 | activation=leaky
207 | 
208 | [route]
209 | layers=-9
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | size=1
214 | stride=1
215 | pad=1
216 | filters=64
217 | activation=leaky
218 | 
219 | [reorg]
220 | stride=2
221 | 
222 | [route]
223 | layers=-1,-4
224 | 
225 | [convolutional]
226 | batch_normalize=1
227 | size=3
228 | stride=1
229 | pad=1
230 | filters=1024
231 | activation=leaky
232 | 
233 | [convolutional]
234 | size=1
235 | stride=1
236 | pad=1
237 | filters=425
238 | activation=linear
239 | 
240 | 
241 | [region]
242 | anchors =  0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
243 | bias_match=1
244 | classes=80
245 | coords=4
246 | num=5
247 | softmax=1
248 | jitter=.3
249 | rescore=1
250 | 
251 | object_scale=5
252 | noobject_scale=1
253 | class_scale=1
254 | coord_scale=1
255 | 
256 | absolute=1
257 | thresh = .6
258 | random=1
259 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/yolo_v3.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=16
  7 | subdivisions=4
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=20,25
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | # Downsample
 34 | 
 35 | [convolutional]
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]
 44 | batch_normalize=1
 45 | filters=32
 46 | size=1
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [shortcut]
 60 | from=-3
 61 | activation=linear
 62 | 
 63 | # Downsample
 64 | 
 65 | [convolutional]
 66 | batch_normalize=1
 67 | filters=128
 68 | size=3
 69 | stride=2
 70 | pad=1
 71 | activation=leaky
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=64
 76 | size=1
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=3
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | [shortcut]
 90 | from=-3
 91 | activation=linear
 92 | 
 93 | [convolutional]
 94 | batch_normalize=1
 95 | filters=64
 96 | size=1
 97 | stride=1
 98 | pad=1
 99 | activation=leaky
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=128
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [shortcut]
110 | from=-3
111 | activation=linear
112 | 
113 | # Downsample
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=256
118 | size=3
119 | stride=2
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | batch_normalize=1
125 | filters=128
126 | size=1
127 | stride=1
128 | pad=1
129 | activation=leaky
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [shortcut]
140 | from=-3
141 | activation=linear
142 | 
143 | [convolutional]
144 | batch_normalize=1
145 | filters=128
146 | size=1
147 | stride=1
148 | pad=1
149 | activation=leaky
150 | 
151 | [convolutional]
152 | batch_normalize=1
153 | filters=256
154 | size=3
155 | stride=1
156 | pad=1
157 | activation=leaky
158 | 
159 | [shortcut]
160 | from=-3
161 | activation=linear
162 | 
163 | [convolutional]
164 | batch_normalize=1
165 | filters=128
166 | size=1
167 | stride=1
168 | pad=1
169 | activation=leaky
170 | 
171 | [convolutional]
172 | batch_normalize=1
173 | filters=256
174 | size=3
175 | stride=1
176 | pad=1
177 | activation=leaky
178 | 
179 | [shortcut]
180 | from=-3
181 | activation=linear
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=128
186 | size=1
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | [convolutional]
192 | batch_normalize=1
193 | filters=256
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 | 
199 | [shortcut]
200 | from=-3
201 | activation=linear
202 | 
203 | 
204 | [convolutional]
205 | batch_normalize=1
206 | filters=128
207 | size=1
208 | stride=1
209 | pad=1
210 | activation=leaky
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=256
215 | size=3
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [shortcut]
221 | from=-3
222 | activation=linear
223 | 
224 | [convolutional]
225 | batch_normalize=1
226 | filters=128
227 | size=1
228 | stride=1
229 | pad=1
230 | activation=leaky
231 | 
232 | [convolutional]
233 | batch_normalize=1
234 | filters=256
235 | size=3
236 | stride=1
237 | pad=1
238 | activation=leaky
239 | 
240 | [shortcut]
241 | from=-3
242 | activation=linear
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=128
247 | size=1
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | batch_normalize=1
254 | filters=256
255 | size=3
256 | stride=1
257 | pad=1
258 | activation=leaky
259 | 
260 | [shortcut]
261 | from=-3
262 | activation=linear
263 | 
264 | [convolutional]
265 | batch_normalize=1
266 | filters=128
267 | size=1
268 | stride=1
269 | pad=1
270 | activation=leaky
271 | 
272 | [convolutional]
273 | batch_normalize=1
274 | filters=256
275 | size=3
276 | stride=1
277 | pad=1
278 | activation=leaky
279 | 
280 | [shortcut]
281 | from=-3
282 | activation=linear
283 | 
284 | # Downsample
285 | 
286 | [convolutional]
287 | batch_normalize=1
288 | filters=512
289 | size=3
290 | stride=2
291 | pad=1
292 | activation=leaky
293 | 
294 | [convolutional]
295 | batch_normalize=1
296 | filters=256
297 | size=1
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]
303 | batch_normalize=1
304 | filters=512
305 | size=3
306 | stride=1
307 | pad=1
308 | activation=leaky
309 | 
310 | [shortcut]
311 | from=-3
312 | activation=linear
313 | 
314 | 
315 | [convolutional]
316 | batch_normalize=1
317 | filters=256
318 | size=1
319 | stride=1
320 | pad=1
321 | activation=leaky
322 | 
323 | [convolutional]
324 | batch_normalize=1
325 | filters=512
326 | size=3
327 | stride=1
328 | pad=1
329 | activation=leaky
330 | 
331 | [shortcut]
332 | from=-3
333 | activation=linear
334 | 
335 | 
336 | [convolutional]
337 | batch_normalize=1
338 | filters=256
339 | size=1
340 | stride=1
341 | pad=1
342 | activation=leaky
343 | 
344 | [convolutional]
345 | batch_normalize=1
346 | filters=512
347 | size=3
348 | stride=1
349 | pad=1
350 | activation=leaky
351 | 
352 | [shortcut]
353 | from=-3
354 | activation=linear
355 | 
356 | 
357 | [convolutional]
358 | batch_normalize=1
359 | filters=256
360 | size=1
361 | stride=1
362 | pad=1
363 | activation=leaky
364 | 
365 | [convolutional]
366 | batch_normalize=1
367 | filters=512
368 | size=3
369 | stride=1
370 | pad=1
371 | activation=leaky
372 | 
373 | [shortcut]
374 | from=-3
375 | activation=linear
376 | 
377 | [convolutional]
378 | batch_normalize=1
379 | filters=256
380 | size=1
381 | stride=1
382 | pad=1
383 | activation=leaky
384 | 
385 | [convolutional]
386 | batch_normalize=1
387 | filters=512
388 | size=3
389 | stride=1
390 | pad=1
391 | activation=leaky
392 | 
393 | [shortcut]
394 | from=-3
395 | activation=linear
396 | 
397 | 
398 | [convolutional]
399 | batch_normalize=1
400 | filters=256
401 | size=1
402 | stride=1
403 | pad=1
404 | activation=leaky
405 | 
406 | [convolutional]
407 | batch_normalize=1
408 | filters=512
409 | size=3
410 | stride=1
411 | pad=1
412 | activation=leaky
413 | 
414 | [shortcut]
415 | from=-3
416 | activation=linear
417 | 
418 | 
419 | [convolutional]
420 | batch_normalize=1
421 | filters=256
422 | size=1
423 | stride=1
424 | pad=1
425 | activation=leaky
426 | 
427 | [convolutional]
428 | batch_normalize=1
429 | filters=512
430 | size=3
431 | stride=1
432 | pad=1
433 | activation=leaky
434 | 
435 | [shortcut]
436 | from=-3
437 | activation=linear
438 | 
439 | [convolutional]
440 | batch_normalize=1
441 | filters=256
442 | size=1
443 | stride=1
444 | pad=1
445 | activation=leaky
446 | 
447 | [convolutional]
448 | batch_normalize=1
449 | filters=512
450 | size=3
451 | stride=1
452 | pad=1
453 | activation=leaky
454 | 
455 | [shortcut]
456 | from=-3
457 | activation=linear
458 | 
459 | # Downsample
460 | 
461 | [convolutional]
462 | batch_normalize=1
463 | filters=1024
464 | size=3
465 | stride=2
466 | pad=1
467 | activation=leaky
468 | 
469 | [convolutional]
470 | batch_normalize=1
471 | filters=512
472 | size=1
473 | stride=1
474 | pad=1
475 | activation=leaky
476 | 
477 | [convolutional]
478 | batch_normalize=1
479 | filters=1024
480 | size=3
481 | stride=1
482 | pad=1
483 | activation=leaky
484 | 
485 | [shortcut]
486 | from=-3
487 | activation=linear
488 | 
489 | [convolutional]
490 | batch_normalize=1
491 | filters=512
492 | size=1
493 | stride=1
494 | pad=1
495 | activation=leaky
496 | 
497 | [convolutional]
498 | batch_normalize=1
499 | filters=1024
500 | size=3
501 | stride=1
502 | pad=1
503 | activation=leaky
504 | 
505 | [shortcut]
506 | from=-3
507 | activation=linear
508 | 
509 | [convolutional]
510 | batch_normalize=1
511 | filters=512
512 | size=1
513 | stride=1
514 | pad=1
515 | activation=leaky
516 | 
517 | [convolutional]
518 | batch_normalize=1
519 | filters=1024
520 | size=3
521 | stride=1
522 | pad=1
523 | activation=leaky
524 | 
525 | [shortcut]
526 | from=-3
527 | activation=linear
528 | 
529 | [convolutional]
530 | batch_normalize=1
531 | filters=512
532 | size=1
533 | stride=1
534 | pad=1
535 | activation=leaky
536 | 
537 | [convolutional]
538 | batch_normalize=1
539 | filters=1024
540 | size=3
541 | stride=1
542 | pad=1
543 | activation=leaky
544 | 
545 | [shortcut]
546 | from=-3
547 | activation=linear
548 | 
549 | ######################
550 | 
551 | [convolutional]
552 | batch_normalize=1
553 | filters=512
554 | size=1
555 | stride=1
556 | pad=1
557 | activation=leaky
558 | 
559 | [convolutional]
560 | batch_normalize=1
561 | size=3
562 | stride=1
563 | pad=1
564 | filters=1024
565 | activation=leaky
566 | 
567 | [convolutional]
568 | batch_normalize=1
569 | filters=512
570 | size=1
571 | stride=1
572 | pad=1
573 | activation=leaky
574 | 
575 | [convolutional]
576 | batch_normalize=1
577 | size=3
578 | stride=1
579 | pad=1
580 | filters=1024
581 | activation=leaky
582 | 
583 | [convolutional]
584 | batch_normalize=1
585 | filters=512
586 | size=1
587 | stride=1
588 | pad=1
589 | activation=leaky
590 | 
591 | [convolutional]
592 | batch_normalize=1
593 | size=3
594 | stride=1
595 | pad=1
596 | filters=1024
597 | activation=leaky
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=255
604 | activation=linear
605 | 
606 | 
607 | [yolo]
608 | mask = 6,7,8
609 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
610 | classes=80
611 | num=9
612 | jitter=.3
613 | ignore_thresh = .5
614 | truth_thresh = 1
615 | random=1
616 | 
617 | 
618 | [route]
619 | layers = -4
620 | 
621 | [convolutional]
622 | batch_normalize=1
623 | filters=256
624 | size=1
625 | stride=1
626 | pad=1
627 | activation=leaky
628 | 
629 | [upsample]
630 | stride=2
631 | 
632 | [route]
633 | layers = -1, 61
634 | 
635 | 
636 | 
637 | [convolutional]
638 | batch_normalize=1
639 | filters=256
640 | size=1
641 | stride=1
642 | pad=1
643 | activation=leaky
644 | 
645 | [convolutional]
646 | batch_normalize=1
647 | size=3
648 | stride=1
649 | pad=1
650 | filters=512
651 | activation=leaky
652 | 
653 | [convolutional]
654 | batch_normalize=1
655 | filters=256
656 | size=1
657 | stride=1
658 | pad=1
659 | activation=leaky
660 | 
661 | [convolutional]
662 | batch_normalize=1
663 | size=3
664 | stride=1
665 | pad=1
666 | filters=512
667 | activation=leaky
668 | 
669 | [convolutional]
670 | batch_normalize=1
671 | filters=256
672 | size=1
673 | stride=1
674 | pad=1
675 | activation=leaky
676 | 
677 | [convolutional]
678 | batch_normalize=1
679 | size=3
680 | stride=1
681 | pad=1
682 | filters=512
683 | activation=leaky
684 | 
685 | [convolutional]
686 | size=1
687 | stride=1
688 | pad=1
689 | filters=255
690 | activation=linear
691 | 
692 | 
693 | [yolo]
694 | mask = 3,4,5
695 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
696 | classes=80
697 | num=9
698 | jitter=.3
699 | ignore_thresh = .5
700 | truth_thresh = 1
701 | random=1
702 | 
703 | 
704 | 
705 | [route]
706 | layers = -4
707 | 
708 | [convolutional]
709 | batch_normalize=1
710 | filters=128
711 | size=1
712 | stride=1
713 | pad=1
714 | activation=leaky
715 | 
716 | [upsample]
717 | stride=2
718 | 
719 | [route]
720 | layers = -1, 36
721 | 
722 | 
723 | 
724 | [convolutional]
725 | batch_normalize=1
726 | filters=128
727 | size=1
728 | stride=1
729 | pad=1
730 | activation=leaky
731 | 
732 | [convolutional]
733 | batch_normalize=1
734 | size=3
735 | stride=1
736 | pad=1
737 | filters=256
738 | activation=leaky
739 | 
740 | [convolutional]
741 | batch_normalize=1
742 | filters=128
743 | size=1
744 | stride=1
745 | pad=1
746 | activation=leaky
747 | 
748 | [convolutional]
749 | batch_normalize=1
750 | size=3
751 | stride=1
752 | pad=1
753 | filters=256
754 | activation=leaky
755 | 
756 | [convolutional]
757 | batch_normalize=1
758 | filters=128
759 | size=1
760 | stride=1
761 | pad=1
762 | activation=leaky
763 | 
764 | [convolutional]
765 | batch_normalize=1
766 | size=3
767 | stride=1
768 | pad=1
769 | filters=256
770 | activation=leaky
771 | 
772 | [convolutional]
773 | size=1
774 | stride=1
775 | pad=1
776 | filters=255
777 | activation=linear
778 | 
779 | 
780 | [yolo]
781 | mask = 0,1,2
782 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
783 | classes=80
784 | num=9
785 | jitter=.3
786 | ignore_thresh = .5
787 | truth_thresh = 1
788 | random=1
789 | 
790 | 


--------------------------------------------------------------------------------
/detector/YOLO3/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/detector/YOLO3/darknet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import numpy as np
  6 | from .cfg import *
  7 | from .region_layer import RegionLayer
  8 | from .yolo_layer import YoloLayer
  9 | 
 10 | 
 11 | # from layers.batchnorm.bn import BN2d
 12 | 
 13 | 
 14 | class MaxPoolStride1(nn.Module):
 15 |     def __init__(self):
 16 |         super(MaxPoolStride1, self).__init__()
 17 | 
 18 |     def forward(self, x):
 19 |         x = F.max_pool2d(F.pad(x, (0, 1, 0, 1), mode='replicate'), 2, stride=1)
 20 |         return x
 21 | 
 22 | 
 23 | class Upsample(nn.Module):
 24 |     def __init__(self, stride=2):
 25 |         super(Upsample, self).__init__()
 26 |         self.stride = stride
 27 | 
 28 |     def forward(self, x):
 29 |         stride = self.stride
 30 |         assert (x.data.dim() == 4)
 31 |         B = x.data.size(0)
 32 |         C = x.data.size(1)
 33 |         H = x.data.size(2)
 34 |         W = x.data.size(3)
 35 |         ws = stride
 36 |         hs = stride
 37 |         x = x.view(B, C, H, 1, W, 1).expand(B, C, H, hs, W, ws).contiguous().view(B, C, H * hs, W * ws)
 38 |         return x
 39 | 
 40 | 
 41 | class Reorg(nn.Module):
 42 |     def __init__(self, stride=2):
 43 |         super(Reorg, self).__init__()
 44 |         self.stride = stride
 45 | 
 46 |     def forward(self, x):
 47 |         stride = self.stride
 48 |         assert (x.data.dim() == 4)
 49 |         B = x.data.size(0)
 50 |         C = x.data.size(1)
 51 |         H = x.data.size(2)
 52 |         W = x.data.size(3)
 53 |         assert (H % stride == 0)
 54 |         assert (W % stride == 0)
 55 |         ws = stride
 56 |         hs = stride
 57 |         x = x.view(B, C, H // hs, hs, W // ws, ws).transpose(3, 4).contiguous()
 58 |         x = x.view(B, C, (H // hs) * (W // ws), hs * ws).transpose(2, 3).contiguous()
 59 |         x = x.view(B, C, hs * ws, H // hs, W // ws).transpose(1, 2).contiguous()
 60 |         x = x.view(B, hs * ws * C, H // hs, W // ws)
 61 |         return x
 62 | 
 63 | 
 64 | class GlobalAvgPool2d(nn.Module):
 65 |     def __init__(self):
 66 |         super(GlobalAvgPool2d, self).__init__()
 67 | 
 68 |     def forward(self, x):
 69 |         N = x.data.size(0)
 70 |         C = x.data.size(1)
 71 |         H = x.data.size(2)
 72 |         W = x.data.size(3)
 73 |         x = F.avg_pool2d(x, (H, W))
 74 |         x = x.view(N, C)
 75 |         return x
 76 | 
 77 | 
 78 | # for route and shortcut
 79 | class EmptyModule(nn.Module):
 80 |     def __init__(self):
 81 |         super(EmptyModule, self).__init__()
 82 | 
 83 |     def forward(self, x):
 84 |         return x
 85 | 
 86 | 
 87 | # support route shortcut and reorg
 88 | 
 89 | class Darknet(nn.Module):
 90 |     def getLossLayers(self):
 91 |         loss_layers = []
 92 |         for m in self.models:
 93 |             if isinstance(m, RegionLayer) or isinstance(m, YoloLayer):
 94 |                 loss_layers.append(m)
 95 |         return loss_layers
 96 | 
 97 |     def __init__(self, cfgfile, use_cuda=True):
 98 |         super(Darknet, self).__init__()
 99 |         self.use_cuda = use_cuda
100 |         self.blocks = parse_cfg(cfgfile)
101 |         self.models = self.create_network(self.blocks)  # merge conv, bn,leaky
102 |         self.loss_layers = self.getLossLayers()
103 | 
104 |         # self.width = int(self.blocks[0]['width'])
105 |         # self.height = int(self.blocks[0]['height'])
106 | 
107 |         if len(self.loss_layers) > 0:
108 |             last = len(self.loss_layers) - 1
109 |             self.anchors = self.loss_layers[last].anchors
110 |             self.num_anchors = self.loss_layers[last].num_anchors
111 |             self.anchor_step = self.loss_layers[last].anchor_step
112 |             self.num_classes = self.loss_layers[last].num_classes
113 | 
114 |         # default format : major=0, minor=1
115 |         self.header = torch.IntTensor([0, 1, 0, 0])
116 |         self.seen = 0
117 | 
118 |     def forward(self, x):
119 |         ind = -2
120 |         self.loss_layers = None
121 |         outputs = dict()
122 |         out_boxes = dict()
123 |         outno = 0
124 |         for block in self.blocks:
125 |             ind = ind + 1
126 | 
127 |             if block['type'] == 'net':
128 |                 continue
129 |             elif block['type'] in ['convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']:
130 |                 x = self.models[ind](x)
131 |                 outputs[ind] = x
132 |             elif block['type'] == 'route':
133 |                 layers = block['layers'].split(',')
134 |                 layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
135 |                 if len(layers) == 1:
136 |                     x = outputs[layers[0]]
137 |                 elif len(layers) == 2:
138 |                     x1 = outputs[layers[0]]
139 |                     x2 = outputs[layers[1]]
140 |                     x = torch.cat((x1, x2), 1)
141 |                 outputs[ind] = x
142 |             elif block['type'] == 'shortcut':
143 |                 from_layer = int(block['from'])
144 |                 activation = block['activation']
145 |                 from_layer = from_layer if from_layer > 0 else from_layer + ind
146 |                 x1 = outputs[from_layer]
147 |                 x2 = outputs[ind - 1]
148 |                 x = x1 + x2
149 |                 if activation == 'leaky':
150 |                     x = F.leaky_relu(x, 0.1, inplace=True)
151 |                 elif activation == 'relu':
152 |                     x = F.relu(x, inplace=True)
153 |                 outputs[ind] = x
154 |             elif block['type'] in ['region', 'yolo']:
155 |                 boxes = self.models[ind].get_mask_boxes(x)
156 |                 out_boxes[outno] = boxes
157 |                 outno += 1
158 |                 outputs[ind] = None
159 |             elif block['type'] == 'cost':
160 |                 continue
161 |             else:
162 |                 print('unknown type %s' % (block['type']))
163 |         return x if outno == 0 else out_boxes
164 | 
165 |     def print_network(self):
166 |         print_cfg(self.blocks)
167 | 
168 |     def create_network(self, blocks):
169 |         models = nn.ModuleList()
170 | 
171 |         prev_filters = 3
172 |         out_filters = []
173 |         prev_stride = 1
174 |         out_strides = []
175 |         conv_id = 0
176 |         ind = -2
177 |         for block in blocks:
178 |             ind += 1
179 |             if block['type'] == 'net':
180 |                 prev_filters = int(block['channels'])
181 |                 self.width = int(block['width'])
182 |                 self.height = int(block['height'])
183 |                 continue
184 |             elif block['type'] == 'convolutional':
185 |                 conv_id = conv_id + 1
186 |                 batch_normalize = int(block['batch_normalize'])
187 |                 filters = int(block['filters'])
188 |                 kernel_size = int(block['size'])
189 |                 stride = int(block['stride'])
190 |                 is_pad = int(block['pad'])
191 |                 pad = (kernel_size - 1) // 2 if is_pad else 0
192 |                 activation = block['activation']
193 |                 model = nn.Sequential()
194 |                 if batch_normalize:
195 |                     model.add_module('conv{0}'.format(conv_id),
196 |                                      nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False))
197 |                     model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters))
198 |                     # model.add_module('bn{0}'.format(conv_id), BN2d(filters))
199 |                 else:
200 |                     model.add_module('conv{0}'.format(conv_id),
201 |                                      nn.Conv2d(prev_filters, filters, kernel_size, stride, pad))
202 |                 if activation == 'leaky':
203 |                     model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True))
204 |                 elif activation == 'relu':
205 |                     model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True))
206 |                 prev_filters = filters
207 |                 out_filters.append(prev_filters)
208 |                 prev_stride = stride * prev_stride
209 |                 out_strides.append(prev_stride)
210 |                 models.append(model)
211 |             elif block['type'] == 'maxpool':
212 |                 pool_size = int(block['size'])
213 |                 stride = int(block['stride'])
214 |                 if stride > 1:
215 |                     model = nn.MaxPool2d(pool_size, stride)
216 |                 else:
217 |                     model = MaxPoolStride1()
218 |                 out_filters.append(prev_filters)
219 |                 prev_stride = stride * prev_stride
220 |                 out_strides.append(prev_stride)
221 |                 models.append(model)
222 |             elif block['type'] == 'avgpool':
223 |                 model = GlobalAvgPool2d()
224 |                 out_filters.append(prev_filters)
225 |                 models.append(model)
226 |             elif block['type'] == 'softmax':
227 |                 model = nn.Softmax()
228 |                 out_strides.append(prev_stride)
229 |                 out_filters.append(prev_filters)
230 |                 models.append(model)
231 |             elif block['type'] == 'cost':
232 |                 if block['_type'] == 'sse':
233 |                     model = nn.MSELoss(size_average=True)
234 |                 elif block['_type'] == 'L1':
235 |                     model = nn.L1Loss(size_average=True)
236 |                 elif block['_type'] == 'smooth':
237 |                     model = nn.SmoothL1Loss(size_average=True)
238 |                 out_filters.append(1)
239 |                 out_strides.append(prev_stride)
240 |                 models.append(model)
241 |             elif block['type'] == 'reorg':
242 |                 stride = int(block['stride'])
243 |                 prev_filters = stride * stride * prev_filters
244 |                 out_filters.append(prev_filters)
245 |                 prev_stride = prev_stride * stride
246 |                 out_strides.append(prev_stride)
247 |                 models.append(Reorg(stride))
248 |             elif block['type'] == 'upsample':
249 |                 stride = int(block['stride'])
250 |                 out_filters.append(prev_filters)
251 |                 prev_stride = prev_stride / stride
252 |                 out_strides.append(prev_stride)
253 |                 # models.append(nn.Upsample(scale_factor=stride, mode='nearest'))
254 |                 models.append(Upsample(stride))
255 |             elif block['type'] == 'route':
256 |                 layers = block['layers'].split(',')
257 |                 ind = len(models)
258 |                 layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers]
259 |                 if len(layers) == 1:
260 |                     prev_filters = out_filters[layers[0]]
261 |                     prev_stride = out_strides[layers[0]]
262 |                 elif len(layers) == 2:
263 |                     assert (layers[0] == ind - 1)
264 |                     prev_filters = out_filters[layers[0]] + out_filters[layers[1]]
265 |                     prev_stride = out_strides[layers[0]]
266 |                 out_filters.append(prev_filters)
267 |                 out_strides.append(prev_stride)
268 |                 models.append(EmptyModule())
269 |             elif block['type'] == 'shortcut':
270 |                 ind = len(models)
271 |                 prev_filters = out_filters[ind - 1]
272 |                 out_filters.append(prev_filters)
273 |                 prev_stride = out_strides[ind - 1]
274 |                 out_strides.append(prev_stride)
275 |                 models.append(EmptyModule())
276 |             elif block['type'] == 'connected':
277 |                 filters = int(block['output'])
278 |                 if block['activation'] == 'linear':
279 |                     model = nn.Linear(prev_filters, filters)
280 |                 elif block['activation'] == 'leaky':
281 |                     model = nn.Sequential(
282 |                         nn.Linear(prev_filters, filters),
283 |                         nn.LeakyReLU(0.1, inplace=True))
284 |                 elif block['activation'] == 'relu':
285 |                     model = nn.Sequential(
286 |                         nn.Linear(prev_filters, filters),
287 |                         nn.ReLU(inplace=True))
288 |                 prev_filters = filters
289 |                 out_filters.append(prev_filters)
290 |                 out_strides.append(prev_stride)
291 |                 models.append(model)
292 |             elif block['type'] == 'region':
293 |                 region_layer = RegionLayer(use_cuda=self.use_cuda)
294 |                 anchors = block['anchors'].split(',')
295 |                 region_layer.anchors = [float(i) for i in anchors]
296 |                 region_layer.num_classes = int(block['classes'])
297 |                 region_layer.num_anchors = int(block['num'])
298 |                 region_layer.anchor_step = len(region_layer.anchors) // region_layer.num_anchors
299 |                 region_layer.rescore = int(block['rescore'])
300 |                 region_layer.object_scale = float(block['object_scale'])
301 |                 region_layer.noobject_scale = float(block['noobject_scale'])
302 |                 region_layer.class_scale = float(block['class_scale'])
303 |                 region_layer.coord_scale = float(block['coord_scale'])
304 |                 region_layer.thresh = float(block['thresh'])
305 |                 out_filters.append(prev_filters)
306 |                 out_strides.append(prev_stride)
307 |                 models.append(region_layer)
308 |             elif block['type'] == 'yolo':
309 |                 yolo_layer = YoloLayer(use_cuda=self.use_cuda)
310 |                 anchors = block['anchors'].split(',')
311 |                 anchor_mask = block['mask'].split(',')
312 |                 yolo_layer.anchor_mask = [int(i) for i in anchor_mask]
313 |                 yolo_layer.anchors = [float(i) for i in anchors]
314 |                 yolo_layer.num_classes = int(block['classes'])
315 |                 yolo_layer.num_anchors = int(block['num'])
316 |                 yolo_layer.anchor_step = len(yolo_layer.anchors) // yolo_layer.num_anchors
317 |                 try:
318 |                     yolo_layer.rescore = int(block['rescore'])
319 |                 except:
320 |                     pass
321 |                 yolo_layer.ignore_thresh = float(block['ignore_thresh'])
322 |                 yolo_layer.truth_thresh = float(block['truth_thresh'])
323 |                 yolo_layer.stride = prev_stride
324 |                 yolo_layer.nth_layer = ind
325 |                 yolo_layer.net_width = self.width
326 |                 yolo_layer.net_height = self.height
327 |                 out_filters.append(prev_filters)
328 |                 out_strides.append(prev_stride)
329 |                 models.append(yolo_layer)
330 |             else:
331 |                 print('unknown type %s' % (block['type']))
332 | 
333 |         return models
334 | 
335 |     def load_binfile(self, weightfile):
336 |         fp = open(weightfile, 'rb')
337 | 
338 |         version = np.fromfile(fp, count=3, dtype=np.int32)
339 |         version = [int(i) for i in version]
340 |         if version[0] * 10 + version[1] >= 2 and version[0] < 1000 and version[1] < 1000:
341 |             seen = np.fromfile(fp, count=1, dtype=np.int64)
342 |         else:
343 |             seen = np.fromfile(fp, count=1, dtype=np.int32)
344 |         self.header = torch.from_numpy(np.concatenate((version, seen), axis=0))
345 |         self.seen = int(seen)
346 |         body = np.fromfile(fp, dtype=np.float32)
347 |         fp.close()
348 |         return body
349 | 
350 |     def load_weights(self, weightfile):
351 |         buf = self.load_binfile(weightfile)
352 | 
353 |         start = 0
354 |         ind = -2
355 |         for block in self.blocks:
356 |             if start >= buf.size:
357 |                 break
358 |             ind = ind + 1
359 |             if block['type'] == 'net':
360 |                 continue
361 |             elif block['type'] == 'convolutional':
362 |                 model = self.models[ind]
363 |                 batch_normalize = int(block['batch_normalize'])
364 |                 if batch_normalize:
365 |                     start = load_conv_bn(buf, start, model[0], model[1])
366 |                 else:
367 |                     start = load_conv(buf, start, model[0])
368 |             elif block['type'] == 'connected':
369 |                 model = self.models[ind]
370 |                 if block['activation'] != 'linear':
371 |                     start = load_fc(buf, start, model[0])
372 |                 else:
373 |                     start = load_fc(buf, start, model)
374 |             elif block['type'] == 'maxpool':
375 |                 pass
376 |             elif block['type'] == 'reorg':
377 |                 pass
378 |             elif block['type'] == 'upsample':
379 |                 pass
380 |             elif block['type'] == 'route':
381 |                 pass
382 |             elif block['type'] == 'shortcut':
383 |                 pass
384 |             elif block['type'] == 'region':
385 |                 pass
386 |             elif block['type'] == 'yolo':
387 |                 pass
388 |             elif block['type'] == 'avgpool':
389 |                 pass
390 |             elif block['type'] == 'softmax':
391 |                 pass
392 |             elif block['type'] == 'cost':
393 |                 pass
394 |             else:
395 |                 print('unknown type %s' % (block['type']))
396 | 
397 |     def save_weights(self, outfile, cutoff=0):
398 |         if cutoff <= 0:
399 |             cutoff = len(self.blocks) - 1
400 | 
401 |         fp = open(outfile, 'wb')
402 |         self.header[3] = self.seen
403 |         header = np.array(self.header[0:3].numpy(), np.int32)
404 |         header.tofile(fp)
405 |         if (self.header[0] * 10 + self.header[1]) >= 2:
406 |             seen = np.array(self.seen, np.int64)
407 |         else:
408 |             seen = np.array(self.seen, np.int32)
409 |         seen.tofile(fp)
410 | 
411 |         ind = -1
412 |         for blockId in range(1, cutoff + 1):
413 |             ind = ind + 1
414 |             block = self.blocks[blockId]
415 |             if block['type'] == 'convolutional':
416 |                 model = self.models[ind]
417 |                 batch_normalize = int(block['batch_normalize'])
418 |                 if batch_normalize:
419 |                     save_conv_bn(fp, model[0], model[1])
420 |                 else:
421 |                     save_conv(fp, model[0])
422 |             elif block['type'] == 'connected':
423 |                 model = self.models[ind]
424 |                 if block['activation'] != 'linear':
425 |                     save_fc(fc, model)
426 |                 else:
427 |                     save_fc(fc, model[0])
428 |             elif block['type'] == 'maxpool':
429 |                 pass
430 |             elif block['type'] == 'reorg':
431 |                 pass
432 |             elif block['type'] == 'upsample':
433 |                 pass
434 |             elif block['type'] == 'route':
435 |                 pass
436 |             elif block['type'] == 'shortcut':
437 |                 pass
438 |             elif block['type'] == 'region':
439 |                 pass
440 |             elif block['type'] == 'yolo':
441 |                 pass
442 |             elif block['type'] == 'avgpool':
443 |                 pass
444 |             elif block['type'] == 'softmax':
445 |                 pass
446 |             elif block['type'] == 'cost':
447 |                 pass
448 |             else:
449 |                 print('unknown type %s' % (block['type']))
450 |         fp.close()
451 | 


--------------------------------------------------------------------------------
/detector/YOLO3/detect.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import time
  3 | from PIL import Image, ImageDraw
  4 | from yolo_utils import *
  5 | from darknet import Darknet
  6 | 
  7 | import cv2
  8 | 
  9 | namesfile = None
 10 | 
 11 | 
 12 | def detect(cfgfile, weightfile, imgfolder):
 13 |     m = Darknet(cfgfile)
 14 |     m.load_weights(weightfile)
 15 |     print('Loaded weights from %s.' % (weightfile))
 16 | 
 17 |     # if m.num_classes == 20:
 18 |     #     namesfile = 'data/voc.names'
 19 |     # elif m.num_classes == 80:
 20 |     #     namesfile = 'data/coco.names'
 21 |     # else:
 22 |     #     namesfile = 'data/names'
 23 | 
 24 |     use_cuda = True
 25 |     if use_cuda:
 26 |         m.cuda()
 27 | 
 28 |     imgfiles = [x for x in os.listdir(imgfolder) if x[-4:] == '.jpg']
 29 |     imgfiles.sort()
 30 |     for imgname in imgfiles:
 31 |         imgfile = os.path.join(imgfolder, imgname)
 32 | 
 33 |         img = Image.open(imgfile).convert('RGB')
 34 |         sized = img.resize((m.width, m.height))
 35 | 
 36 |         # for i in range(2):
 37 |         start = time.time()
 38 |         boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
 39 |         finish = time.time()
 40 |         # if i == 1:
 41 |         print('%s: Predicted in %f seconds.' % (imgfile, (finish - start)))
 42 | 
 43 |         class_names = load_class_names(namesfile)
 44 |         img = plot_boxes(img, boxes, 'result/{}'.format(os.path.basename(imgfile)), class_names)
 45 |         img = np.array(img)
 46 |         cv2.imshow('{}'.format(os.path.basename(imgfolder)), img)
 47 |         cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000, 800)
 48 |         cv2.waitKey(1000)
 49 | 
 50 | 
 51 | def detect_cv2(cfgfile, weightfile, imgfile):
 52 |     import cv2
 53 |     m = Darknet(cfgfile)
 54 | 
 55 |     m.print_network()
 56 |     m.load_weights(weightfile)
 57 |     print('Loaded weights from %s.' % (weightfile))
 58 | 
 59 |     if m.num_classes == 20:
 60 |         namesfile = 'data/voc.names'
 61 |     elif m.num_classes == 80:
 62 |         namesfile = 'data/coco.names'
 63 |     else:
 64 |         namesfile = 'data/names'
 65 | 
 66 |     use_cuda = True
 67 |     if use_cuda:
 68 |         m.cuda()
 69 | 
 70 |     img = cv2.imread(imgfile)
 71 |     sized = cv2.resize(img, (m.width, m.height))
 72 |     sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB)
 73 | 
 74 |     for i in range(2):
 75 |         start = time.time()
 76 |         boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
 77 |         finish = time.time()
 78 |         if i == 1:
 79 |             print('%s: Predicted in %f seconds.' % (imgfile, (finish - start)))
 80 | 
 81 |     class_names = load_class_names(namesfile)
 82 |     plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names)
 83 | 
 84 | 
 85 | def detect_skimage(cfgfile, weightfile, imgfile):
 86 |     from skimage import io
 87 |     from skimage.transform import resize
 88 |     m = Darknet(cfgfile)
 89 | 
 90 |     m.print_network()
 91 |     m.load_weights(weightfile)
 92 |     print('Loading weights from %s... Done!' % (weightfile))
 93 | 
 94 |     if m.num_classes == 20:
 95 |         namesfile = 'data/voc.names'
 96 |     elif m.num_classes == 80:
 97 |         namesfile = 'data/coco.names'
 98 |     else:
 99 |         namesfile = 'data/names'
100 | 
101 |     use_cuda = True
102 |     if use_cuda:
103 |         m.cuda()
104 | 
105 |     img = io.imread(imgfile)
106 |     sized = resize(img, (m.width, m.height)) * 255
107 | 
108 |     for i in range(2):
109 |         start = time.time()
110 |         boxes = do_detect(m, sized, 0.5, 0.4, use_cuda)
111 |         finish = time.time()
112 |         if i == 1:
113 |             print('%s: Predicted in %f seconds.' % (imgfile, (finish - start)))
114 | 
115 |     class_names = load_class_names(namesfile)
116 |     plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names)
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     if len(sys.argv) == 5:
121 |         cfgfile = sys.argv[1]
122 |         weightfile = sys.argv[2]
123 |         imgfolder = sys.argv[3]
124 |         cv2.namedWindow('{}'.format(os.path.basename(imgfolder)), cv2.WINDOW_NORMAL)
125 |         cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000, 800)
126 |         globals()["namesfile"] = sys.argv[4]
127 |         detect(cfgfile, weightfile, imgfolder)
128 |         # detect_cv2(cfgfile, weightfile, imgfile)
129 |         # detect_skimage(cfgfile, weightfile, imgfile)
130 |     else:
131 |         print('Usage: ')
132 |         print('  python detect.py cfgfile weightfile imgfolder names')
133 |         # detect('cfg/tiny-yolo-voc.cfg', 'tiny-yolo-voc.weights', 'data/person.jpg', version=1)
134 | 


--------------------------------------------------------------------------------
/detector/YOLO3/detector.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | from .darknet import Darknet
 6 | from .yolo_utils import get_all_boxes, nms, post_process, xywh_to_xyxy, xyxy_to_xywh
 7 | 
 8 | 
 9 | class YOLOv3(object):
10 |     def __init__(self, cfgfile, weightfile, namesfile, score_thresh=0.7, conf_thresh=0.01, nms_thresh=0.45, is_xywh=False, use_cuda=True):
11 |         # net definition
12 |         self.net = Darknet(cfgfile)
13 |         self.net.load_weights(weightfile)
14 |         print('Loaded weights from %s.' % (weightfile))
15 |         self.device = "cuda" if use_cuda else "cpu"
16 |         self.net.eval()
17 |         self.net.to(self.device)
18 | 
19 |         # constants
20 |         self.size = self.net.width, self.net.height
21 |         self.score_thresh = score_thresh
22 |         self.conf_thresh = conf_thresh
23 |         self.nms_thresh = nms_thresh
24 |         self.use_cuda = use_cuda
25 |         self.is_xywh = is_xywh
26 |         self.num_classes = self.net.num_classes
27 |         self.class_names = self.load_class_names(namesfile)
28 | 
29 |     def __call__(self, ori_img):
30 |         # img to tensor
31 |         assert isinstance(ori_img, np.ndarray), "input must be a numpy array!"
32 |         img = ori_img.astype(np.float)/255.
33 | 
34 |         img = cv2.resize(img, self.size)
35 |         img = torch.from_numpy(img).float().permute(2, 0, 1).unsqueeze(0)
36 |         
37 |         # forward
38 |         with torch.no_grad():
39 |             img = img.to(self.device)
40 |             out_boxes = self.net(img)
41 |             boxes = get_all_boxes(out_boxes, self.conf_thresh, self.num_classes, use_cuda=self.use_cuda) #batch size is 1
42 |             # boxes = nms(boxes, self.nms_thresh)
43 |             # nms嵌入到下面的处理函数中
44 |             boxes = post_process(boxes, self.net.num_classes, self.conf_thresh, self.nms_thresh)[0].cpu()
45 |             boxes = boxes[boxes[:, -2]> self.score_thresh, :] # bbox xmin ymin xmax ymax
46 | 
47 |         if len(boxes) == 0:
48 |             return None, None, None
49 |         
50 |         height, width = ori_img.shape[:2]
51 |         bbox = boxes[:,:4]
52 |         if self.is_xywh:
53 |             # bbox x y w h
54 |             bbox = xyxy_to_xywh(bbox)
55 | 
56 |         bbox = bbox * torch.FloatTensor([[width, height, width, height]])
57 |         cls_conf = boxes[:,5]
58 |         cls_ids = boxes[:,6].long()
59 |         return bbox.numpy(), cls_conf.numpy(), cls_ids.numpy()
60 | 
61 |     def load_class_names(self,namesfile):
62 |         with open(namesfile, 'r', encoding='utf8') as fp:
63 |             class_names = [line.strip() for line in fp.readlines()]
64 |         return class_names
65 | 


--------------------------------------------------------------------------------
/detector/YOLO3/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms import boxes_nms


--------------------------------------------------------------------------------
/detector/YOLO3/nms/nms.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | import torchvision
 3 | 
 4 | try:
 5 |     import torch
 6 |     import torch_extension
 7 | 
 8 |     _nms = torch_extension.nms
 9 | except ImportError:
10 |     if torchvision.__version__ >= '0.3.0':
11 |         _nms = torchvision.ops.nms
12 |     else:
13 |         from .python_nms import python_nms
14 | 
15 |         _nms = python_nms
16 |         warnings.warn('You are using python version NMS, which is very very slow. Try compile c++ NMS '
17 |                       'using `cd ext & python build.py build_ext develop`')
18 | 
19 | 
20 | def boxes_nms(boxes, scores, nms_thresh, max_count=-1):
21 |     """ Performs non-maximum suppression, run on GPU or CPU according to
22 |     boxes's device.
23 |     Args:
24 |         boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(or relative coordinates), shape is (n, 4)
25 |         scores(Tensor): scores, shape is (n, )
26 |         nms_thresh(float): thresh
27 |         max_count (int): if > 0, then only the top max_proposals are kept  after non-maximum suppression
28 |     Returns:
29 |         indices kept.
30 |     """
31 |     keep = _nms(boxes, scores, nms_thresh)
32 |     if max_count > 0:
33 |         keep = keep[:max_count]
34 |     return keep
35 | 


--------------------------------------------------------------------------------
/detector/YOLO3/nms/python_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | 
 5 | def python_nms(boxes, scores, nms_thresh):
 6 |     if boxes.numel() == 0:
 7 |         return torch.empty((0,), dtype=torch.long)
 8 |     # Use numpy to run nms. Running nms in PyTorch code on CPU is really slow.
 9 |     origin_device = boxes.device
10 |     cpu_device = torch.device('cpu')
11 |     boxes = boxes.to(cpu_device).numpy()
12 |     scores = scores.to(cpu_device).numpy()
13 | 
14 |     x1 = boxes[:, 0]
15 |     y1 = boxes[:, 1]
16 |     x2 = boxes[:, 2]
17 |     y2 = boxes[:, 3]
18 |     areas = (x2 - x1) * (y2 - y1)
19 |     order = np.argsort(scores)[::-1]
20 |     num_detections = boxes.shape[0]
21 |     suppressed = np.zeros((num_detections,), dtype=np.bool)
22 |     for _i in range(num_detections):
23 |         i = order[_i]
24 |         if suppressed[i]:
25 |             continue
26 |         ix1 = x1[i]
27 |         iy1 = y1[i]
28 |         ix2 = x2[i]
29 |         iy2 = y2[i]
30 |         iarea = areas[i]
31 | 
32 |         for _j in range(_i + 1, num_detections):
33 |             j = order[_j]
34 |             if suppressed[j]:
35 |                 continue
36 | 
37 |             xx1 = max(ix1, x1[j])
38 |             yy1 = max(iy1, y1[j])
39 |             xx2 = min(ix2, x2[j])
40 |             yy2 = min(iy2, y2[j])
41 |             w = max(0, xx2 - xx1)
42 |             h = max(0, yy2 - yy1)
43 | 
44 |             inter = w * h
45 |             ovr = inter / (iarea + areas[j] - inter)
46 |             if ovr >= nms_thresh:
47 |                 suppressed[j] = True
48 |     keep = np.nonzero(suppressed == 0)[0]
49 |     keep = torch.from_numpy(keep).to(origin_device)
50 |     return keep
51 | 


--------------------------------------------------------------------------------
/detector/YOLO3/region_layer.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import sys
  4 | import time
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu
  9 | 
 10 | class RegionLayer(nn.Module):
 11 |     def __init__(self, num_classes=0, anchors=[], num_anchors=1, use_cuda=None):
 12 |         super(RegionLayer, self).__init__()
 13 |         use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda)
 14 |         self.device = torch.device("cuda" if use_cuda else "cpu")
 15 |         self.num_classes = num_classes
 16 |         self.num_anchors = num_anchors
 17 |         self.anchor_step = len(anchors)//num_anchors
 18 |         #self.anchors = torch.stack(torch.FloatTensor(anchors).split(self.anchor_step)).to(self.device)
 19 |         self.anchors = torch.FloatTensor(anchors).view(self.num_anchors, self.anchor_step).to(self.device)
 20 |         self.rescore = 1
 21 |         self.coord_scale = 1
 22 |         self.noobject_scale = 1
 23 |         self.object_scale = 5
 24 |         self.class_scale = 1
 25 |         self.thresh = 0.6
 26 |         self.seen = 0
 27 | 
 28 |     def build_targets(self, pred_boxes, target, nH, nW):
 29 |         nB = target.size(0)
 30 |         nA = self.num_anchors
 31 |         conf_mask  = torch.ones (nB, nA, nH, nW) * self.noobject_scale
 32 |         coord_mask = torch.zeros(nB, nA, nH, nW)
 33 |         cls_mask   = torch.zeros(nB, nA, nH, nW)
 34 |         tcoord     = torch.zeros( 4, nB, nA, nH, nW)
 35 |         tconf      = torch.zeros(nB, nA, nH, nW)
 36 |         tcls       = torch.zeros(nB, nA, nH, nW)
 37 | 
 38 |         nAnchors = nA*nH*nW
 39 |         nPixels  = nH*nW
 40 |         nGT = 0 # number of ground truth
 41 |         nRecall = 0
 42 |         # it works faster on CPU than on GPU.
 43 |         anchors = self.anchors.to("cpu")
 44 | 
 45 |         if self.seen < 12800:
 46 |             tcoord[0].fill_(0.5)
 47 |             tcoord[1].fill_(0.5)
 48 |             coord_mask.fill_(1)
 49 | 
 50 |         for b in range(nB):
 51 |             cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t()
 52 |             cur_ious = torch.zeros(nAnchors)
 53 |             tbox = target[b].view(-1,5).to("cpu")
 54 |             for t in range(50):
 55 |                 if tbox[t][1] == 0:
 56 |                     break
 57 |                 gx, gw = [ i * nW for i in (tbox[t][1], tbox[t][3]) ]
 58 |                 gy, gh = [ i * nH for i in (tbox[t][2], tbox[t][4]) ]
 59 |                 cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors,1).t()
 60 |                 cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
 61 |             ignore_ix = cur_ious>self.thresh
 62 |             conf_mask[b][ignore_ix.view(nA,nH,nW)] = 0
 63 | 
 64 |             for t in range(50):
 65 |                 if tbox[t][1] == 0:
 66 |                     break
 67 |                 nGT += 1
 68 |                 gx, gw = [ i * nW for i in (tbox[t][1], tbox[t][3]) ]
 69 |                 gy, gh = [ i * nH for i in (tbox[t][2], tbox[t][4]) ]
 70 |                 gw, gh = gw.float(), gh.float()
 71 |                 gi, gj = int(gx), int(gy)
 72 | 
 73 |                 tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA,1).t()
 74 |                 anchor_boxes = torch.cat((torch.zeros(nA, 2), anchors),1).t()
 75 |                 tmp_ious = multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False)
 76 |                 best_iou, best_n = torch.max(tmp_ious, 0)
 77 | 
 78 |                 if self.anchor_step == 4: # this part is not tested.
 79 |                     tmp_ious_mask = (tmp_ious==best_iou)
 80 |                     if tmp_ious_mask.sum() > 0:
 81 |                         gt_pos = torch.FloatTensor([gi, gj, gx, gy]).repeat(nA,1).t()
 82 |                         an_pos = anchor_boxes[4:6] # anchor_boxes are consisted of [0 0 aw ah ax ay]
 83 |                         dist = pow(((gt_pos[0]+an_pos[0])-gt_pos[2]),2) + pow(((gt_pos[1]+an_pos[1])-gt_pos[3]),2)
 84 |                         dist[1-tmp_ious_mask]=10000 # set the large number for the small ious
 85 |                         _, best_n = torch.min(dist,0)
 86 | 
 87 |                 gt_box = torch.FloatTensor([gx, gy, gw, gh])
 88 |                 pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi]
 89 |                 iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
 90 | 
 91 |                 coord_mask[b][best_n][gj][gi] = 1
 92 |                 cls_mask  [b][best_n][gj][gi] = 1
 93 |                 conf_mask [b][best_n][gj][gi] = self.object_scale
 94 |                 tcoord [0][b][best_n][gj][gi] = gx - gi
 95 |                 tcoord [1][b][best_n][gj][gi] = gy - gj
 96 |                 tcoord [2][b][best_n][gj][gi] = math.log(gw/anchors[best_n][0])
 97 |                 tcoord [3][b][best_n][gj][gi] = math.log(gh/anchors[best_n][1])
 98 |                 tcls      [b][best_n][gj][gi] = tbox[t][0]
 99 |                 tconf     [b][best_n][gj][gi] = iou if self.rescore else 1.
100 |                 if iou > 0.5:
101 |                     nRecall += 1
102 | 
103 |         return nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
104 | 
105 |     def get_mask_boxes(self, output):
106 |         if not isinstance(self.anchors, torch.Tensor):
107 |             self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device)
108 |         masked_anchors = self.anchors.view(-1)
109 |         num_anchors = torch.IntTensor([self.num_anchors]).to(self.device)
110 |         return {'x':output, 'a':masked_anchors, 'n':num_anchors}
111 | 
112 |     def forward(self, output, target):
113 |         #output : BxAs*(4+1+num_classes)*H*W
114 |         t0 = time.time()
115 |         nB = output.data.size(0)    # batch size
116 |         nA = self.num_anchors
117 |         nC = self.num_classes
118 |         nH = output.data.size(2)
119 |         nW = output.data.size(3)
120 |         cls_anchor_dim = nB*nA*nH*nW
121 | 
122 |         if not isinstance(self.anchors, torch.Tensor):
123 |             self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device)
124 | 
125 |         output = output.view(nB, nA, (5+nC), nH, nW)
126 |         cls_grid = torch.linspace(5,5+nC-1,nC).long().to(self.device)
127 |         ix = torch.LongTensor(range(0,5)).to(self.device)
128 |         pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device)
129 | 
130 |         coord = output.index_select(2, ix[0:4]).view(nB*nA, -1, nH*nW).transpose(0,1).contiguous().view(-1,cls_anchor_dim)  # x, y, w, h
131 |         coord[0:2] = coord[0:2].sigmoid()                                   # x, y
132 |         conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid()
133 |         cls  = output.index_select(2, cls_grid)
134 |         cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(cls_anchor_dim, nC)
135 | 
136 |         t1 = time.time()
137 |         grid_x = torch.linspace(0, nW-1, nW).repeat(nB*nA, nH, 1).view(cls_anchor_dim).to(self.device)
138 |         grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(cls_anchor_dim).to(self.device)
139 |         anchor_w = self.anchors.index_select(1, ix[0]).repeat(1, nB*nH*nW).view(cls_anchor_dim)
140 |         anchor_h = self.anchors.index_select(1, ix[1]).repeat(1, nB*nH*nW).view(cls_anchor_dim)
141 | 
142 |         pred_boxes[0] = coord[0] + grid_x
143 |         pred_boxes[1] = coord[1] + grid_y
144 |         pred_boxes[2] = coord[2].exp() * anchor_w
145 |         pred_boxes[3] = coord[3].exp() * anchor_h
146 |         # for build_targets. it works faster on CPU than on GPU
147 |         pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)).detach()
148 | 
149 |         t2 = time.time()
150 |         nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \
151 |             self.build_targets(pred_boxes, target.detach(), nH, nW)
152 | 
153 |         cls_mask = (cls_mask == 1)
154 |         tcls = tcls[cls_mask].long().view(-1)
155 |         cls_mask = cls_mask.view(-1, 1).repeat(1,nC).to(self.device)
156 |         cls = cls[cls_mask].view(-1, nC)
157 | 
158 |         nProposals = int((conf > 0.25).sum())
159 | 
160 |         tcoord = tcoord.view(4, cls_anchor_dim).to(self.device)
161 |         tconf, tcls = tconf.to(self.device), tcls.to(self.device)
162 |         coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.sqrt().to(self.device)
163 | 
164 |         t3 = time.time()
165 |         loss_coord = self.coord_scale * nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2
166 |         # sqrt(object_scale)/2 is almost equal to 1.
167 |         loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2 
168 |         loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0
169 |         loss = loss_coord + loss_conf + loss_cls
170 |         t4 = time.time()
171 |         if False:
172 |             print('-'*30)
173 |             print('        activation : %f' % (t1 - t0))
174 |             print(' create pred_boxes : %f' % (t2 - t1))
175 |             print('     build targets : %f' % (t3 - t2))
176 |             print('       create loss : %f' % (t4 - t3))
177 |             print('             total : %f' % (t4 - t0))
178 |         print('%d: nGT %3d, nRC %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' 
179 |             % (self.seen, nGT, nRecall, nProposals, loss_coord, loss_conf, loss_cls, loss))
180 |         if math.isnan(loss.item()):
181 |             print(conf, tconf)
182 |             sys.exit(0)
183 |         return loss
184 | 


--------------------------------------------------------------------------------
/detector/YOLO3/weight/tips.txt:
--------------------------------------------------------------------------------
1 | download yolo3 weights to this folder from official website


--------------------------------------------------------------------------------
/detector/YOLO3/yolo_layer.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import sys
  4 | import time
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu
  9 | 
 10 | class YoloLayer(nn.Module):
 11 |     def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, use_cuda=None):
 12 |         super(YoloLayer, self).__init__()
 13 |         use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda)
 14 |         self.device = torch.device("cuda" if use_cuda else "cpu")
 15 | 
 16 |         self.anchor_mask = anchor_mask
 17 |         self.num_classes = num_classes
 18 |         self.anchors = anchors
 19 |         self.num_anchors = num_anchors
 20 |         self.anchor_step = len(anchors)//num_anchors
 21 |         self.rescore = 0
 22 |         self.ignore_thresh = 0.5
 23 |         self.truth_thresh = 1.
 24 |         self.stride = 32
 25 |         self.nth_layer = 0
 26 |         self.seen = 0
 27 |         self.net_width = 0
 28 |         self.net_height = 0
 29 | 
 30 |     def get_mask_boxes(self, output):
 31 |         masked_anchors = []
 32 |         for m in self.anchor_mask:
 33 |             masked_anchors += self.anchors[m*self.anchor_step:(m+1)*self.anchor_step]
 34 |         masked_anchors = [anchor/self.stride for anchor in masked_anchors]
 35 | 
 36 |         masked_anchors = torch.FloatTensor(masked_anchors).to(self.device)
 37 |         num_anchors = torch.IntTensor([len(self.anchor_mask)]).to(self.device)
 38 |         return {'x':output, 'a':masked_anchors, 'n':num_anchors}
 39 | 
 40 |     def build_targets(self, pred_boxes, target, anchors, nA, nH, nW):
 41 |         nB = target.size(0)
 42 |         anchor_step = anchors.size(1) # anchors[nA][anchor_step]
 43 |         conf_mask  = torch.ones (nB, nA, nH, nW)
 44 |         coord_mask = torch.zeros(nB, nA, nH, nW)
 45 |         cls_mask   = torch.zeros(nB, nA, nH, nW)
 46 |         tcoord     = torch.zeros( 4, nB, nA, nH, nW)
 47 |         tconf      = torch.zeros(nB, nA, nH, nW)
 48 |         tcls       = torch.zeros(nB, nA, nH, nW)
 49 |         twidth, theight = self.net_width/self.stride, self.net_height/self.stride
 50 | 
 51 |         nAnchors = nA*nH*nW
 52 |         nPixels  = nH*nW
 53 |         nGT = 0
 54 |         nRecall = 0
 55 |         nRecall75 = 0
 56 | 
 57 |         # it works faster on CPU than on GPU.
 58 |         anchors = anchors.to("cpu")
 59 | 
 60 |         for b in range(nB):
 61 |             cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t()
 62 |             cur_ious = torch.zeros(nAnchors)
 63 |             tbox = target[b].view(-1,5).to("cpu")
 64 |             for t in range(50):
 65 |                 if tbox[t][1] == 0:
 66 |                     break
 67 |                 gx, gy = tbox[t][1] * nW, tbox[t][2] * nH
 68 |                 gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight
 69 |                 cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors,1).t()
 70 |                 cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
 71 |             ignore_ix = cur_ious>self.ignore_thresh
 72 |             conf_mask[b][ignore_ix.view(nA,nH,nW)] = 0
 73 | 
 74 |             for t in range(50):
 75 |                 if tbox[t][1] == 0:
 76 |                     break
 77 |                 nGT += 1
 78 |                 gx, gy = tbox[t][1] * nW, tbox[t][2] * nH
 79 |                 gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight
 80 |                 gw, gh = gw.float(), gh.float()
 81 |                 gi, gj = int(gx), int(gy)
 82 | 
 83 |                 tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA,1).t()
 84 |                 anchor_boxes = torch.cat((torch.zeros(nA, anchor_step), anchors),1).t()
 85 |                 _, best_n = torch.max(multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False), 0)
 86 | 
 87 |                 gt_box = torch.FloatTensor([gx, gy, gw, gh])
 88 |                 pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi]
 89 |                 iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
 90 | 
 91 |                 coord_mask[b][best_n][gj][gi] = 1
 92 |                 cls_mask  [b][best_n][gj][gi] = 1
 93 |                 conf_mask [b][best_n][gj][gi] = 1
 94 |                 tcoord [0][b][best_n][gj][gi] = gx - gi
 95 |                 tcoord [1][b][best_n][gj][gi] = gy - gj
 96 |                 tcoord [2][b][best_n][gj][gi] = math.log(gw/anchors[best_n][0])
 97 |                 tcoord [3][b][best_n][gj][gi] = math.log(gh/anchors[best_n][1])
 98 |                 tcls      [b][best_n][gj][gi] = tbox[t][0]
 99 |                 tconf     [b][best_n][gj][gi] = iou if self.rescore else 1.
100 | 
101 |                 if iou > 0.5:
102 |                     nRecall += 1
103 |                     if iou > 0.75:
104 |                         nRecall75 += 1
105 | 
106 |         return nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
107 | 
108 |     def forward(self, output, target):
109 |         #output : BxAs*(4+1+num_classes)*H*W
110 |         mask_tuple = self.get_mask_boxes(output)
111 |         t0 = time.time()
112 |         nB = output.data.size(0)    # batch size
113 |         nA = mask_tuple['n'].item() # num_anchors
114 |         nC = self.num_classes
115 |         nH = output.data.size(2)
116 |         nW = output.data.size(3)
117 |         anchor_step = mask_tuple['a'].size(0)//nA
118 |         anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device)
119 |         cls_anchor_dim = nB*nA*nH*nW
120 | 
121 |         output  = output.view(nB, nA, (5+nC), nH, nW)
122 |         cls_grid = torch.linspace(5,5+nC-1,nC).long().to(self.device)
123 |         ix = torch.LongTensor(range(0,5)).to(self.device)
124 |         pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device)
125 | 
126 |         coord = output.index_select(2, ix[0:4]).view(nB*nA, -1, nH*nW).transpose(0,1).contiguous().view(-1,cls_anchor_dim)  # x, y, w, h
127 |         coord[0:2] = coord[0:2].sigmoid()                                   # x, y
128 |         conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid()
129 |         cls  = output.index_select(2, cls_grid)
130 |         cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(cls_anchor_dim, nC)
131 | 
132 |         t1 = time.time()
133 |         grid_x = torch.linspace(0, nW-1, nW).repeat(nB*nA, nH, 1).view(cls_anchor_dim).to(self.device)
134 |         grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(cls_anchor_dim).to(self.device)
135 |         anchor_w = anchors.index_select(1, ix[0]).repeat(1, nB*nH*nW).view(cls_anchor_dim)
136 |         anchor_h = anchors.index_select(1, ix[1]).repeat(1, nB*nH*nW).view(cls_anchor_dim)
137 | 
138 |         pred_boxes[0] = coord[0] + grid_x
139 |         pred_boxes[1] = coord[1] + grid_y
140 |         pred_boxes[2] = coord[2].exp() * anchor_w
141 |         pred_boxes[3] = coord[3].exp() * anchor_h
142 |         # for build_targets. it works faster on CPU than on GPU
143 |         pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)).detach()
144 | 
145 |         t2 = time.time()
146 |         nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \
147 |             self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW)
148 | 
149 |         cls_mask = (cls_mask == 1)
150 |         tcls = tcls[cls_mask].long().view(-1)
151 |         cls_mask = cls_mask.view(-1, 1).repeat(1,nC).to(self.device)
152 |         cls = cls[cls_mask].view(-1, nC)
153 | 
154 |         nProposals = int((conf > 0.25).sum())
155 |         
156 |         tcoord = tcoord.view(4, cls_anchor_dim).to(self.device)
157 |         tconf, tcls = tconf.to(self.device), tcls.to(self.device)
158 |         coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.to(self.device)
159 | 
160 |         t3 = time.time()
161 |         loss_coord = nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2
162 |         loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)
163 |         loss_cls = nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0
164 |         loss = loss_coord + loss_conf + loss_cls
165 | 
166 |         t4 = time.time()
167 |         if False:
168 |             print('-'*30)
169 |             print('        activation : %f' % (t1 - t0))
170 |             print(' create pred_boxes : %f' % (t2 - t1))
171 |             print('     build targets : %f' % (t3 - t2))
172 |             print('       create loss : %f' % (t4 - t3))
173 |             print('             total : %f' % (t4 - t0))
174 |         print('%d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' 
175 |                 % (self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord, loss_conf, loss_cls, loss))
176 |         if math.isnan(loss.item()):
177 |             print(conf, tconf)
178 |             sys.exit(0)
179 |         return loss
180 | 


--------------------------------------------------------------------------------
/detector/YOLO3/yolo_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import math
  4 | import torch
  5 | import numpy as np
  6 | from PIL import Image, ImageDraw, ImageFont
  7 | import struct  # get_image_size
  8 | import imghdr  # get_image_size
  9 | from .nms import boxes_nms
 10 | 
 11 | 
 12 | def sigmoid(x):
 13 |     return 1.0 / (math.exp(-x) + 1.)
 14 | 
 15 | 
 16 | def softmax(x):
 17 |     x = torch.exp(x - torch.max(x))
 18 |     x = x / x.sum()
 19 |     return x
 20 | 
 21 | 
 22 | def bbox_iou(box1, box2, x1y1x2y2=True):
 23 |     if x1y1x2y2:
 24 |         x1_min = min(box1[0], box2[0])
 25 |         x2_max = max(box1[2], box2[2])
 26 |         y1_min = min(box1[1], box2[1])
 27 |         y2_max = max(box1[3], box2[3])
 28 |         w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
 29 |         w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
 30 |     else:
 31 |         w1, h1 = box1[2], box1[3]
 32 |         w2, h2 = box2[2], box2[3]
 33 |         x1_min = min(box1[0] - w1 / 2.0, box2[0] - w2 / 2.0)
 34 |         x2_max = max(box1[0] + w1 / 2.0, box2[0] + w2 / 2.0)
 35 |         y1_min = min(box1[1] - h1 / 2.0, box2[1] - h2 / 2.0)
 36 |         y2_max = max(box1[1] + h1 / 2.0, box2[1] + h2 / 2.0)
 37 | 
 38 |     w_union = x2_max - x1_min
 39 |     h_union = y2_max - y1_min
 40 |     w_cross = w1 + w2 - w_union
 41 |     h_cross = h1 + h2 - h_union
 42 |     carea = 0
 43 |     if w_cross <= 0 or h_cross <= 0:
 44 |         return 0.0
 45 | 
 46 |     area1 = w1 * h1
 47 |     area2 = w2 * h2
 48 |     carea = w_cross * h_cross
 49 |     uarea = area1 + area2 - carea
 50 |     return float(carea / uarea)
 51 | 
 52 | 
 53 | def multi_bbox_ious(boxes1, boxes2, x1y1x2y2=True):
 54 |     if x1y1x2y2:
 55 |         x1_min = torch.min(boxes1[0], boxes2[0])
 56 |         x2_max = torch.max(boxes1[2], boxes2[2])
 57 |         y1_min = torch.min(boxes1[1], boxes2[1])
 58 |         y2_max = torch.max(boxes1[3], boxes2[3])
 59 |         w1, h1 = boxes1[2] - boxes1[0], boxes1[3] - boxes1[1]
 60 |         w2, h2 = boxes2[2] - boxes2[0], boxes2[3] - boxes2[1]
 61 |     else:
 62 |         w1, h1 = boxes1[2], boxes1[3]
 63 |         w2, h2 = boxes2[2], boxes2[3]
 64 |         x1_min = torch.min(boxes1[0] - w1 / 2.0, boxes2[0] - w2 / 2.0)
 65 |         x2_max = torch.max(boxes1[0] + w1 / 2.0, boxes2[0] + w2 / 2.0)
 66 |         y1_min = torch.min(boxes1[1] - h1 / 2.0, boxes2[1] - h2 / 2.0)
 67 |         y2_max = torch.max(boxes1[1] + h1 / 2.0, boxes2[1] + h2 / 2.0)
 68 | 
 69 |     w_union = x2_max - x1_min
 70 |     h_union = y2_max - y1_min
 71 |     w_cross = w1 + w2 - w_union
 72 |     h_cross = h1 + h2 - h_union
 73 |     mask = (((w_cross <= 0) + (h_cross <= 0)) > 0)
 74 |     area1 = w1 * h1
 75 |     area2 = w2 * h2
 76 |     carea = w_cross * h_cross
 77 |     carea[mask] = 0
 78 |     uarea = area1 + area2 - carea
 79 |     return carea / uarea
 80 | 
 81 | 
 82 | def post_process(boxes, num_classes, conf_thresh=0.01, nms_thresh=0.45, obj_thresh=0.3):
 83 |     batch_size = boxes.size(0)
 84 | 
 85 |     # nms
 86 |     results_boxes = []
 87 |     for batch_id in range(batch_size):
 88 |         processed_boxes = []
 89 |         for cls_id in range(num_classes):
 90 |             mask = (boxes[batch_id, :, -1] == cls_id) * (boxes[batch_id, :, 4] > obj_thresh)
 91 |             masked_boxes = boxes[batch_id, mask]
 92 | 
 93 |             keep = boxes_nms(masked_boxes[:, :4], masked_boxes[:, 5], nms_thresh)
 94 | 
 95 |             nmsed_boxes = masked_boxes[keep, :]
 96 | 
 97 |             processed_boxes.append(nmsed_boxes)
 98 |         processed_boxes = torch.cat(processed_boxes, dim=0)
 99 | 
100 |     results_boxes.append(processed_boxes)
101 | 
102 |     return results_boxes
103 | 
104 | 
105 | def xywh_to_xyxy(boxes_xywh):
106 |     boxes_xyxy = boxes_xywh.copy()
107 |     boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
108 |     boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
109 |     boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
110 |     boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2.
111 | 
112 |     return boxes_xyxy
113 | 
114 | 
115 | def xyxy_to_xywh(boxes_xyxy):
116 |     if isinstance(boxes_xyxy, torch.Tensor):
117 |         boxes_xywh = boxes_xyxy.clone()
118 |     elif isinstance(boxes_xyxy, np.ndarray):
119 |         boxes_xywh = boxes_xyxy.copy()
120 | 
121 |     boxes_xywh[:, 0] = (boxes_xyxy[:, 0] + boxes_xyxy[:, 2]) / 2.
122 |     boxes_xywh[:, 1] = (boxes_xyxy[:, 1] + boxes_xyxy[:, 3]) / 2.
123 |     boxes_xywh[:, 2] = boxes_xyxy[:, 2] - boxes_xyxy[:, 0]
124 |     boxes_xywh[:, 3] = boxes_xyxy[:, 3] - boxes_xyxy[:, 1]
125 | 
126 |     return boxes_xywh
127 | 
128 | 
129 | def nms(boxes, nms_thresh):
130 |     if len(boxes) == 0:
131 |         return boxes
132 | 
133 |     det_confs = torch.zeros(len(boxes))
134 |     print(boxes.shape)
135 |     for i in range(len(boxes)):
136 |         det_confs[i] = boxes[i][4]
137 | 
138 |     _, sortIds = torch.sort(det_confs, descending=True)
139 |     out_boxes = []
140 |     for i in range(len(boxes)):
141 |         box_i = boxes[sortIds[i]]
142 |         if box_i[4] > 0:
143 |             out_boxes.append(box_i)
144 |             for j in range(i + 1, len(boxes)):
145 |                 box_j = boxes[sortIds[j]]
146 |                 if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh:
147 |                     box_j[4] = 0
148 |     return out_boxes
149 | 
150 | 
151 | def convert2cpu(gpu_matrix):
152 |     return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
153 | 
154 | 
155 | def convert2cpu_long(gpu_matrix):
156 |     return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
157 | 
158 | 
159 | def get_all_boxes(output, conf_thresh, num_classes, only_objectness=1, validation=False, use_cuda=True):
160 |     # total number of inputs (batch size)
161 |     # first element (x) for first tuple (x, anchor_mask, num_anchor)
162 |     batchsize = output[0]['x'].data.size(0)
163 | 
164 |     all_boxes = []
165 |     for i in range(len(output)):
166 |         pred, anchors, num_anchors = output[i]['x'].data, output[i]['a'], output[i]['n'].item()
167 |         boxes = get_region_boxes(pred, conf_thresh, num_classes, anchors, num_anchors,
168 |                                  only_objectness=only_objectness, validation=validation, use_cuda=use_cuda)
169 | 
170 |         all_boxes.append(boxes)
171 |     return torch.cat(all_boxes, dim=1)
172 | 
173 | 
174 | def get_region_boxes(output, obj_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False,
175 |                      use_cuda=True):
176 |     device = torch.device("cuda" if use_cuda else "cpu")
177 |     anchors = anchors.to(device)
178 |     anchor_step = anchors.size(0) // num_anchors
179 |     if output.dim() == 3:
180 |         output = output.unsqueeze(0)
181 |     batch = output.size(0)
182 |     assert (output.size(1) == (5 + num_classes) * num_anchors)
183 |     h = output.size(2)
184 |     w = output.size(3)
185 |     cls_anchor_dim = batch * num_anchors * h * w
186 | 
187 |     # all_boxes = []
188 |     output = output.view(batch * num_anchors, 5 + num_classes, h * w).transpose(0, 1).contiguous().view(5 + num_classes,
189 |                                                                                                         cls_anchor_dim)
190 | 
191 |     grid_x = torch.linspace(0, w - 1, w).repeat(batch * num_anchors, h, 1).view(cls_anchor_dim).to(device)
192 |     grid_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().repeat(batch * num_anchors, 1, 1).view(cls_anchor_dim).to(
193 |         device)
194 |     ix = torch.LongTensor(range(0, 2)).to(device)
195 |     anchor_w = anchors.view(num_anchors, anchor_step).index_select(1, ix[0]).repeat(1, batch, h * w).view(
196 |         cls_anchor_dim)
197 |     anchor_h = anchors.view(num_anchors, anchor_step).index_select(1, ix[1]).repeat(1, batch, h * w).view(
198 |         cls_anchor_dim)
199 | 
200 |     xs, ys = torch.sigmoid(output[0]) + grid_x, torch.sigmoid(output[1]) + grid_y
201 |     ws, hs = torch.exp(output[2]) * anchor_w.detach(), torch.exp(output[3]) * anchor_h.detach()
202 |     det_confs = torch.sigmoid(output[4])
203 | 
204 |     # by ysyun, dim=1 means input is 2D or even dimension else dim=0
205 |     cls_confs = torch.nn.Softmax(dim=1)(output[5:5 + num_classes].transpose(0, 1)).detach()
206 |     cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
207 |     cls_max_confs = cls_max_confs.view(-1)
208 |     cls_max_ids = cls_max_ids.view(-1).float()
209 | 
210 |     # sz_hw = h*w
211 |     # sz_hwa = sz_hw*num_anchors
212 |     # det_confs = convert2cpu(det_confs)
213 |     # cls_max_confs = convert2cpu(cls_max_confs)
214 |     # cls_max_ids = convert2cpu_long(cls_max_ids)
215 |     # xs, ys = convert2cpu(xs), convert2cpu(ys)
216 |     # ws, hs = convert2cpu(ws), convert2cpu(hs)
217 | 
218 |     cls_confs = det_confs * cls_max_confs
219 | 
220 |     # boxes = [xs/w, ys/h, ws/w, hs/h, det_confs, cls_confs, cls_max_ids]
221 |     xs, ys, ws, hs = xs / w, ys / h, ws / w, hs / h
222 |     x1, y1, x2, y2 = torch.clamp_min(xs - ws / 2., 0.), torch.clamp_min(ys - hs / 2., 0.), torch.clamp_max(xs + ws / 2.,
223 |                                                                                                            1.), torch.clamp_max(
224 |         ys + hs / 2., 1.)
225 |     boxes = [x1, y1, x2, y2, det_confs, cls_confs, cls_max_ids]
226 |     boxes = list(map(lambda x: x.view(batch, -1), boxes))
227 |     boxes = torch.stack(boxes, dim=2)
228 | 
229 |     # for b in range(batch):
230 |     #     boxes = []
231 |     #     for cy in range(h):
232 |     #         for cx in range(w):
233 |     #             for i in range(num_anchors):
234 |     #                 ind = b*sz_hwa + i*sz_hw + cy*w + cx
235 |     #                 det_conf =  det_confs[ind]
236 |     #                 if only_objectness:
237 |     #                     conf = det_confs[ind]
238 |     #                 else:
239 |     #                     conf = det_confs[ind] * cls_max_confs[ind]
240 | 
241 |     #                 if conf > conf_thresh:
242 |     #                     bcx = xs[ind]
243 |     #                     bcy = ys[ind]
244 |     #                     bw = ws[ind]
245 |     #                     bh = hs[ind]
246 |     #                     cls_max_conf = cls_max_confs[ind]
247 |     #                     cls_max_id = cls_max_ids[ind]
248 |     #                     box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id]
249 | 
250 |     #                     boxes.append(box)
251 |     #     all_boxes.append(boxes)
252 |     return boxes
253 | 
254 | 
255 | # def get_all_boxes(output, conf_thresh, num_classes, only_objectness=1, validation=False, use_cuda=True):
256 | #     # total number of inputs (batch size)
257 | #     # first element (x) for first tuple (x, anchor_mask, num_anchor)
258 | #     tot = output[0]['x'].data.size(0)
259 | #     all_boxes = [[] for i in range(tot)]
260 | #     for i in range(len(output)):
261 | #         pred, anchors, num_anchors = output[i]['x'].data, output[i]['a'], output[i]['n'].item()
262 | #         b = get_region_boxes(pred, conf_thresh, num_classes, anchors, num_anchors, \
263 | #                 only_objectness=only_objectness, validation=validation, use_cuda=use_cuda)
264 | #         for t in range(tot):
265 | #             all_boxes[t] += b[t]
266 | #     return all_boxes
267 | 
268 | # def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False, use_cuda=True):
269 | #     device = torch.device("cuda" if use_cuda else "cpu")
270 | #     anchors = anchors.to(device)
271 | #     anchor_step = anchors.size(0)//num_anchors
272 | #     if output.dim() == 3:
273 | #         output = output.unsqueeze(0)
274 | #     batch = output.size(0)
275 | #     assert(output.size(1) == (5+num_classes)*num_anchors)
276 | #     h = output.size(2)
277 | #     w = output.size(3)
278 | #     cls_anchor_dim = batch*num_anchors*h*w
279 | 
280 | #     t0 = time.time()
281 | #     all_boxes = []
282 | #     output = output.view(batch*num_anchors, 5+num_classes, h*w).transpose(0,1).contiguous().view(5+num_classes, cls_anchor_dim)
283 | 
284 | #     grid_x = torch.linspace(0, w-1, w).repeat(batch*num_anchors, h, 1).view(cls_anchor_dim).to(device)
285 | #     grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(batch*num_anchors, 1, 1).view(cls_anchor_dim).to(device)
286 | #     ix = torch.LongTensor(range(0,2)).to(device)
287 | #     anchor_w = anchors.view(num_anchors, anchor_step).index_select(1, ix[0]).repeat(1, batch, h*w).view(cls_anchor_dim)
288 | #     anchor_h = anchors.view(num_anchors, anchor_step).index_select(1, ix[1]).repeat(1, batch, h*w).view(cls_anchor_dim)
289 | 
290 | #     xs, ys = torch.sigmoid(output[0]) + grid_x, torch.sigmoid(output[1]) + grid_y
291 | #     ws, hs = torch.exp(output[2]) * anchor_w.detach(), torch.exp(output[3]) * anchor_h.detach()
292 | #     det_confs = torch.sigmoid(output[4])
293 | 
294 | #     # by ysyun, dim=1 means input is 2D or even dimension else dim=0
295 | #     cls_confs = torch.nn.Softmax(dim=1)(output[5:5+num_classes].transpose(0,1)).detach()
296 | #     cls_max_confs, cls_max_ids = torch.max(cls_confs, 1)
297 | #     cls_max_confs = cls_max_confs.view(-1)
298 | #     cls_max_ids = cls_max_ids.view(-1)
299 | #     t1 = time.time()
300 | 
301 | #     sz_hw = h*w
302 | #     sz_hwa = sz_hw*num_anchors
303 | #     det_confs = convert2cpu(det_confs)
304 | #     cls_max_confs = convert2cpu(cls_max_confs)
305 | #     cls_max_ids = convert2cpu_long(cls_max_ids)
306 | #     xs, ys = convert2cpu(xs), convert2cpu(ys)
307 | #     ws, hs = convert2cpu(ws), convert2cpu(hs)
308 | #     if validation:
309 | #         cls_confs = convert2cpu(cls_confs.view(-1, num_classes))
310 | 
311 | #     t2 = time.time()
312 | #     for b in range(batch):
313 | #         boxes = []
314 | #         for cy in range(h):
315 | #             for cx in range(w):
316 | #                 for i in range(num_anchors):
317 | #                     ind = b*sz_hwa + i*sz_hw + cy*w + cx
318 | #                     det_conf =  det_confs[ind]
319 | #                     if only_objectness:
320 | #                         conf = det_confs[ind]
321 | #                     else:
322 | #                         conf = det_confs[ind] * cls_max_confs[ind]
323 | 
324 | #                     if conf > conf_thresh:
325 | #                         bcx = xs[ind]
326 | #                         bcy = ys[ind]
327 | #                         bw = ws[ind]
328 | #                         bh = hs[ind]
329 | #                         cls_max_conf = cls_max_confs[ind]
330 | #                         cls_max_id = cls_max_ids[ind]
331 | #                         box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id]
332 | #                         if (not only_objectness) and validation:
333 | #                             for c in range(num_classes):
334 | #                                 tmp_conf = cls_confs[ind][c]
335 | #                                 if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh:
336 | #                                     box.append(tmp_conf)
337 | #                                     box.append(c)
338 | #                         boxes.append(box)
339 | #         all_boxes.append(boxes)
340 | #     t3 = time.time()
341 | #     if False:
342 | #         print('---------------------------------')
343 | #         print('matrix computation : %f' % (t1-t0))
344 | #         print('        gpu to cpu : %f' % (t2-t1))
345 | #         print('      boxes filter : %f' % (t3-t2))
346 | #         print('---------------------------------')
347 | #     return all_boxes
348 | 
349 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
350 |     import cv2
351 |     colors = torch.FloatTensor([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]])
352 | 
353 |     def get_color(c, x, max_val):
354 |         ratio = float(x) / max_val * 5
355 |         i = int(math.floor(ratio))
356 |         j = int(math.ceil(ratio))
357 |         ratio = ratio - i
358 |         r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
359 |         return int(r * 255)
360 | 
361 |     width = img.shape[1]
362 |     height = img.shape[0]
363 |     for i in range(len(boxes)):
364 |         box = boxes[i]
365 |         x1 = int(round((box[0] - box[2] / 2.0) * width))
366 |         y1 = int(round((box[1] - box[3] / 2.0) * height))
367 |         x2 = int(round((box[0] + box[2] / 2.0) * width))
368 |         y2 = int(round((box[1] + box[3] / 2.0) * height))
369 | 
370 |         if color:
371 |             rgb = color
372 |         else:
373 |             rgb = (255, 0, 0)
374 |         if len(box) >= 7 and class_names:
375 |             cls_conf = box[5]
376 |             cls_id = box[6]
377 |             # print('%s: %f' % (class_names[cls_id], cls_conf))
378 |             classes = len(class_names)
379 |             offset = cls_id * 123457 % classes
380 |             red = get_color(2, offset, classes)
381 |             green = get_color(1, offset, classes)
382 |             blue = get_color(0, offset, classes)
383 |             if color is None:
384 |                 rgb = (red, green, blue)
385 |             img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
386 |         img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
387 |     if savename:
388 |         print("save plot results to %s" % savename)
389 |         cv2.imwrite(savename, img)
390 |     return img
391 | 
392 | 
393 | def plot_boxes(img, boxes, savename=None, class_names=None):
394 |     colors = torch.FloatTensor([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]])
395 | 
396 |     def get_color(c, x, max_val):
397 |         ratio = float(x) / max_val * 5
398 |         i = int(math.floor(ratio))
399 |         j = int(math.ceil(ratio))
400 |         ratio = ratio - i
401 |         r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
402 |         return int(r * 255)
403 | 
404 |     width = img.width
405 |     height = img.height
406 |     draw = ImageDraw.Draw(img)
407 |     print("%d box(es) is(are) found" % len(boxes))
408 |     for i in range(len(boxes)):
409 |         box = boxes[i]
410 |         x1 = (box[0] - box[2] / 2.0) * width
411 |         y1 = (box[1] - box[3] / 2.0) * height
412 |         x2 = (box[0] + box[2] / 2.0) * width
413 |         y2 = (box[1] + box[3] / 2.0) * height
414 | 
415 |         rgb = (255, 0, 0)
416 |         if len(box) >= 7 and class_names:
417 |             cls_conf = box[5]
418 |             cls_id = box[6]
419 |             print('%s: %f' % (class_names[cls_id], cls_conf))
420 |             classes = len(class_names)
421 |             offset = cls_id * 123457 % classes
422 |             red = get_color(2, offset, classes)
423 |             green = get_color(1, offset, classes)
424 |             blue = get_color(0, offset, classes)
425 |             rgb = (red, green, blue)
426 |             draw.text((x1, y1), class_names[cls_id], fill=rgb)
427 |         draw.rectangle([x1, y1, x2, y2], outline=rgb)
428 |     if savename:
429 |         print("save plot results to %s" % savename)
430 |         img.save(savename)
431 |     return img
432 | 
433 | 
434 | def read_truths(lab_path):
435 |     if not os.path.exists(lab_path):
436 |         return np.array([])
437 |     if os.path.getsize(lab_path):
438 |         truths = np.loadtxt(lab_path)
439 |         truths = truths.reshape(truths.size // 5, 5)  # to avoid single truth problem
440 |         return truths
441 |     else:
442 |         return np.array([])
443 | 
444 | 
445 | def read_truths_args(lab_path, min_box_scale):
446 |     truths = read_truths(lab_path)
447 |     new_truths = []
448 |     for i in range(truths.shape[0]):
449 |         if truths[i][3] < min_box_scale:
450 |             continue
451 |         new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]])
452 |     return np.array(new_truths)
453 | 
454 | 
455 | def load_class_names(namesfile):
456 |     class_names = []
457 |     with open(namesfile, 'r', encoding='utf8') as fp:
458 |         lines = fp.readlines()
459 |     for line in lines:
460 |         class_names.append(line.strip())
461 |     return class_names
462 | 
463 | 
464 | def image2torch(img):
465 |     if isinstance(img, Image.Image):
466 |         width = img.width
467 |         height = img.height
468 |         img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes()))
469 |         img = img.view(height, width, 3).transpose(0, 1).transpose(0, 2).contiguous()
470 |         img = img.view(1, 3, height, width)
471 |         img = img.float().div(255.0)
472 |     elif type(img) == np.ndarray:  # cv2 image
473 |         img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
474 |     else:
475 |         print("unknown image type")
476 |         exit(-1)
477 |     return img
478 | 
479 | 
480 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=True):
481 |     model.eval()
482 |     t0 = time.time()
483 |     img = image2torch(img)
484 |     t1 = time.time()
485 | 
486 |     img = img.to(torch.device("cuda" if use_cuda else "cpu"))
487 |     t2 = time.time()
488 | 
489 |     out_boxes = model(img)
490 |     boxes = get_all_boxes(out_boxes, conf_thresh, model.num_classes, use_cuda=use_cuda)[0]
491 | 
492 |     t3 = time.time()
493 |     boxes = nms(boxes, nms_thresh)
494 |     t4 = time.time()
495 | 
496 |     if False:
497 |         print('-----------------------------------')
498 |         print(' image to tensor : %f' % (t1 - t0))
499 |         print('  tensor to cuda : %f' % (t2 - t1))
500 |         print('         predict : %f' % (t3 - t2))
501 |         print('             nms : %f' % (t4 - t3))
502 |         print('           total : %f' % (t4 - t0))
503 |         print('-----------------------------------')
504 |     return boxes
505 | 
506 | 
507 | def read_data_cfg(datacfg):
508 |     options = dict()
509 |     options['gpus'] = '0,1,2,3'
510 |     options['num_workers'] = '10'
511 |     with open(datacfg, 'r') as fp:
512 |         lines = fp.readlines()
513 | 
514 |     for line in lines:
515 |         line = line.strip()
516 |         if line == '':
517 |             continue
518 |         key, value = line.split('=')
519 |         key = key.strip()
520 |         value = value.strip()
521 |         options[key] = value
522 |     return options
523 | 
524 | 
525 | def scale_bboxes(bboxes, width, height):
526 |     import copy
527 |     dets = copy.deepcopy(bboxes)
528 |     for i in range(len(dets)):
529 |         dets[i][0] = dets[i][0] * width
530 |         dets[i][1] = dets[i][1] * height
531 |         dets[i][2] = dets[i][2] * width
532 |         dets[i][3] = dets[i][3] * height
533 |     return dets
534 | 
535 | 
536 | def file_lines(thefilepath):
537 |     count = 0
538 |     thefile = open(thefilepath, 'rb')
539 |     while True:
540 |         buffer = thefile.read(8192 * 1024)
541 |         if not buffer:
542 |             break
543 |         count += buffer.count(b'\n')
544 |     thefile.close()
545 |     return count
546 | 
547 | 
548 | def get_image_size(fname):
549 |     '''Determine the image type of fhandle and return its size.
550 |     from draco'''
551 |     with open(fname, 'rb') as fhandle:
552 |         head = fhandle.read(24)
553 |         if len(head) != 24:
554 |             return
555 |         if imghdr.what(fname) == 'png':
556 |             check = struct.unpack('>i', head[4:8])[0]
557 |             if check != 0x0d0a1a0a:
558 |                 return
559 |             width, height = struct.unpack('>ii', head[16:24])
560 |         elif imghdr.what(fname) == 'gif':
561 |             width, height = struct.unpack('<HH', head[6:10])
562 |         elif imghdr.what(fname) == 'jpeg' or imghdr.what(fname) == 'jpg':
563 |             try:
564 |                 fhandle.seek(0)  # Read 0xff next
565 |                 size = 2
566 |                 ftype = 0
567 |                 while not 0xc0 <= ftype <= 0xcf:
568 |                     fhandle.seek(size, 1)
569 |                     byte = fhandle.read(1)
570 |                     while ord(byte) == 0xff:
571 |                         byte = fhandle.read(1)
572 |                     ftype = ord(byte)
573 |                     size = struct.unpack('>H', fhandle.read(2))[0] - 2
574 |                     # We are at a SOFn block
575 |                 fhandle.seek(1, 1)  # Skip `precision' byte.
576 |                 height, width = struct.unpack('>HH', fhandle.read(4))
577 |             except Exception:  # IGNORE:W0703
578 |                 return
579 |         else:
580 |             return
581 |         return width, height
582 | 
583 | 
584 | def logging(message):
585 |     print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message))
586 | 


--------------------------------------------------------------------------------
/detector/__init__.py:
--------------------------------------------------------------------------------
 1 | from .YOLO3 import YOLOv3
 2 | 
 3 | 
 4 | __all__ = ['build_detector']
 5 | 
 6 | 
 7 | def build_detector(cfg, use_cuda):
 8 |     return YOLOv3(cfg.YOLOV3.CFG, cfg.YOLOV3.WEIGHT, cfg.YOLOV3.CLASS_NAMES, 
 9 |                     score_thresh=cfg.YOLOV3.SCORE_THRESH, nms_thresh=cfg.YOLOV3.NMS_THRESH, 
10 |                     is_xywh=True, use_cuda=use_cuda)
11 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | pillow
 4 | torch
 5 | torchvision
 6 | opencv-python
 7 | scikit-learn
 8 | vizer
 9 | pyyaml
10 | easydict
11 | matplotlib
12 | django>=2.0
13 | tqdm
14 | cos-python-sdk-v5
15 | cython
16 | ffmpy3
17 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/utils/__init__.py


--------------------------------------------------------------------------------
/utils/dataset_reconstruct.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Zhou Chen
 3 | Date: 2020/3/1
 4 | Desc: 数据集重构为按不同行人划分文件夹的格式
 5 | """
 6 | import os
 7 | import shutil
 8 | import re
 9 | import tqdm
10 | 
11 | 
12 | def reconstruct_market1501(source_path, generate_path):
13 |     """
14 |     重构MARKET数据集为不同的行人在不同的文件夹下（MARS数据集就是这种格式，无需重构）
15 |     """
16 |     img_names = os.listdir(source_path)
17 |     pattern = re.compile(r'([-\d]+)_c(\d)')
18 |     for img_name in tqdm.tqdm(img_names):
19 |         if '.jpg' not in img_name:
20 |             continue
21 |         # pid: 每个人的标签编号 1
22 |         # _  : 摄像头号 2
23 |         pid, _ = map(int, pattern.search(img_name).groups())
24 |         # 去掉没用的图片
25 |         if pid == 0 or pid == -1:
26 |             # 不处理的无用图片
27 |             continue
28 |         target_folder = os.path.join(generate_path, str(pid))
29 |         if not os.path.exists(target_folder):
30 |             os.makedirs(target_folder)
31 |         shutil.copy(os.path.join(source_path, img_name), os.path.join(target_folder, img_name))
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     src_dir = r'data/Market-1501-v15.09.15/'
36 |     target_dir = r'data/Market-generated/'
37 |     reconstruct_market1501(src_dir, target_dir)


--------------------------------------------------------------------------------
/utils/dataset_split.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import shutil
 4 | import random
 5 | import tqdm
 6 | 
 7 | 
 8 | data_folder = '../dataset/MARS/'
 9 | train_folder = '../dataset/MARS-generated/bbox_train/'
10 | test_folder = '../dataset/MARS-generated/bbox_test/'
11 | 
12 | 
13 | def check_folder():
14 |     if not os.path.exists(data_folder):
15 |         os.mkdir(data_folder)
16 |     if not os.path.exists(train_folder):
17 |         os.mkdir(train_folder)
18 |     if not os.path.exists(test_folder):
19 |         os.mkdir(test_folder)
20 | 
21 | 
22 | def split_dataset():
23 |     """
24 |     划分训练集和测试集
25 |     :return:
26 |     """
27 |     raw_data_folder = '../dataset/MARS/'
28 |     categories = os.listdir(raw_data_folder)
29 |     label_list = []
30 |     for category in tqdm.tqdm(categories):
31 |         label = categories.index(category)
32 |         label_list.append(label)
33 |         category_folder = os.path.join(raw_data_folder, category)
34 |         files = glob.glob(category_folder + '/*.jpg')
35 |         random.shuffle(files)
36 |         train_size = int(0.8 * len(files))
37 |         test_size = int(0.2 * len(files))
38 |         train_files = files[:train_size]
39 |         test_files = files[train_size:]
40 |         out_path = os.path.join(train_folder, str(label))
41 |         if not os.path.exists(out_path):
42 |             os.mkdir(out_path)
43 |         for img in train_files:
44 |             shutil.copy(img, os.path.join(out_path, os.path.split(img)[-1]))
45 |         out_path = os.path.join(test_folder, str(label))
46 |         if not os.path.exists(out_path):
47 |             os.mkdir(out_path)
48 |         for img in test_files:
49 |             shutil.copy(img, os.path.join(out_path, os.path.split(img)[-1]))
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     check_folder()
54 |     split_dataset()
55 | 


--------------------------------------------------------------------------------
/utils/draw_bbox.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | 
 3 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
 4 | 
 5 | 
 6 | def compute_color_for_labels(label):
 7 |     """
 8 |     标签颜色生成，尽量保证不同id对应的bbox框颜色不同
 9 |     """
10 |     color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
11 |     return tuple(color)
12 | 
13 | 
14 | def draw_boxes(img, bbox, identities=None, offset=(0, 0)):
15 |     """
16 |     绘制bbox框在视频上
17 |     Parameters
18 |     ----------
19 |     img
20 |     bbox
21 |     identities
22 |     offset
23 | 
24 |     Returns
25 |     -------
26 | 
27 |     """
28 |     for i, box in enumerate(bbox):
29 |         x1, y1, x2, y2 = [int(i) for i in box]
30 |         x1 += offset[0]
31 |         x2 += offset[0]
32 |         y1 += offset[1]
33 |         y2 += offset[1]
34 |         # box text and bar
35 |         id = int(identities[i]) if identities is not None else 0
36 |         color = compute_color_for_labels(id)
37 |         label = '{}{:d}'.format("", id)
38 |         t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0]
39 |         cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
40 |         cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1)
41 |         cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2)
42 |     return img
43 | 
44 | 


--------------------------------------------------------------------------------
/utils/format_factory.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Zhou Chen
 3 | Date: 2020/4/2
 4 | Desc:  进行视频格式和编码的转换，需要安装ffmpeg包并加入当前环境的环境变量
 5 | """
 6 | from ffmpy3 import FFmpeg
 7 | 
 8 | 
 9 | def avi2mp4(source_path: str, target_path:str):
10 |     print("start transformation")
11 |     ff = FFmpeg(
12 |         inputs={source_path: '-f avi'},
13 |         outputs={target_path: '-f mp4 -y'}
14 |     )
15 |     print(ff.cmd)
16 |     ff.run()
17 |     print("finish transformation")
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     # 测试脚本
22 |     avi2mp4("../result/result.avi", "../result/result.mp4")


--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | from easydict import EasyDict as edict
 4 | 
 5 | 
 6 | class YamlParser(edict):
 7 |     def __init__(self, cfg_dict=None, config_file=None):
 8 |         if cfg_dict is None:
 9 |             cfg_dict = {}
10 | 
11 |         if config_file is not None:
12 |             assert (os.path.isfile(config_file))
13 |             with open(config_file, 'r') as fo:
14 |                 cfg_dict.update(yaml.load(fo.read()))
15 | 
16 |         super(YamlParser, self).__init__(cfg_dict)
17 | 
18 |     def merge_from_file(self, config_file):
19 |         with open(config_file, 'r', encoding="utf8") as fo:
20 |             self.update(yaml.load(fo.read(), Loader=yaml.FullLoader))
21 | 
22 |     def merge_from_dict(self, config_dict):
23 |         self.update(config_dict)
24 | 
25 | 
26 | def parse_config(config_file=None):
27 |     return YamlParser(config_file=config_file)
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     cfg = YamlParser(config_file="../configs/yolov3.yml")
32 |     cfg.merge_from_file("../configs/deepsort.yml")
33 |     print(cfg)
34 | 


--------------------------------------------------------------------------------
/web/README.md:
--------------------------------------------------------------------------------
1 | Demo网站


--------------------------------------------------------------------------------
/web/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Author: Zhou Chen
3 | Date: 2020/3/17
4 | Desc: desc
5 | """


--------------------------------------------------------------------------------
/web/db.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/web/db.sqlite3


--------------------------------------------------------------------------------
/web/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Django's command-line utility for administrative tasks."""
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def main():
 8 |     os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'web.settings')
 9 |     try:
10 |         from django.core.management import execute_from_command_line
11 |     except ImportError as exc:
12 |         raise ImportError(
13 |             "Couldn't import Django. Are you sure it's installed and "
14 |             "available on your PYTHONPATH environment variable? Did you "
15 |             "forget to activate a virtual environment?"
16 |         ) from exc
17 |     execute_from_command_line(sys.argv)
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     main()
22 | 


--------------------------------------------------------------------------------
/web/static/images/bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/web/static/images/bg.png


--------------------------------------------------------------------------------
/web/templates/show_images.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <html lang="zh">
 4 | 
 5 | <head>
 6 |     <meta charset="UTF-8">
 7 |     <title>显示图片</title>
 8 |     <link href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet">
 9 |     <script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>
10 |     <script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>
11 | </head>
12 |    
13 | <body>
14 | <div class="container" style="padding-bottom: 100px">
15 |     <h3 align="center" class="h3">目标跟踪结果</h3>
16 |         {% for image in images %}
17 |         <img src="/static/images/{{ image }}" style="margin-top:3px; padding: 5px" position="float" class="img-thumbnail col-sm-4">
18 |         {% endfor %}
19 | </div>
20 | 
21 | </body>
22 | 
23 | 
24 | </html>
25 | 


--------------------------------------------------------------------------------
/web/templates/show_video.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="zh">
 3 | 
 4 | <head>
 5 |     {#依赖jQueryVideoJS实现#}
 6 |     <meta charset="UTF-8">
 7 |     <title>显示跟踪视频</title>
 8 |     <link href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet">
 9 |     <script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>
10 |     <script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>
11 |     <link href="/static/css/video-js.min.css" rel="stylesheet">
12 |     <script src="/static/js/video.min.js" type="text/javascript"></script>
13 | </head>
14 |    
15 | <body>
16 | <div class="container" style="padding-bottom: 100px">
17 |     <h3 align="center" class="h3">目标跟踪结果</h3>
18 |     {% load static %}
19 |     <div class="m" style="margin: 0 auto; width: 800px">
20 |         <video id="my-video" class="video-js" controls preload="none" width="800" height="600"
21 |                poster="/static/images/bg.png" data-setup="{}">
22 |             <source src='{% url 'video' %}?path=rst.mp4' type="video/mp4">
23 |         </video>
24 |     </div>
25 | </div>
26 | 
27 | </body>
28 | 
29 | <script type="text/javascript">
30 |     var myPlayer = videojs('my-video');
31 |     videojs("my-video").ready(function () {
32 |         var myPlayer = this;
33 |         myPlayer.play();
34 |     });
35 | </script>
36 | 
37 | 
38 | </html>


--------------------------------------------------------------------------------
/web/templates/upload.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <html lang="zh">
 4 | 
 5 | <head>
 6 |     <meta charset="UTF-8">
 7 |     <title>多目标跟踪演示</title>
 8 |     <link href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css" rel="stylesheet">
 9 |     <script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>
10 |     <script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>
11 | </head>
12 | 
13 | <body>
14 |     <div class="container">
15 |         <h1 class="h1" style="text-align: center">多目标跟踪演示</h1>
16 |     <form method="post" enctype="multipart/form-data" action="" class="form-horizontal">
17 |     {% csrf_token %}
18 |         <div class="form-group">
19 |             <label for="inputfile" class="sr-only"></label>
20 |             <div class="col-sm-3 col-sm-offset-4">
21 |                 <input type="file" name="video" id="inputfile" multiple>
22 |             </div>
23 |             <div class="col-sm-2">
24 |                 <button type="submit" class="">开始上传</button>
25 |             </div>
26 |         </div>
27 |         <div class="form-group">
28 |             <img src="/static/images/bg.png" class="img-response center-block img-thumbnail">
29 |         </div>
30 | </form>
31 | 
32 | </div>
33 | </body>
34 | 
35 | 
36 | </html>


--------------------------------------------------------------------------------
/web/web/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/web/web/__init__.py


--------------------------------------------------------------------------------
/web/web/asgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ASGI config for web project.
 3 | 
 4 | It exposes the ASGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/asgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.asgi import get_asgi_application
13 | 
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'web.settings')
15 | 
16 | application = get_asgi_application()
17 | 


--------------------------------------------------------------------------------
/web/web/settings.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Django settings for web project.
  3 | 
  4 | Generated by 'django-admin startproject' using Django 3.0.4.
  5 | 
  6 | For more information on this file, see
  7 | https://docs.djangoproject.com/en/3.0/topics/settings/
  8 | 
  9 | For the full list of settings and their values, see
 10 | https://docs.djangoproject.com/en/3.0/ref/settings/
 11 | """
 12 | 
 13 | import os
 14 | 
 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | 
 18 | 
 19 | # Quick-start development settings - unsuitable for production
 20 | # See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/
 21 | 
 22 | # SECURITY WARNING: keep the secret key used in production secret!
 23 | SECRET_KEY = '3srrhz1#(#ebb%&0+_$mkpob2(^+&=19@7moir-jm3w3ma%#pm'
 24 | 
 25 | # SECURITY WARNING: don't run with debug turned on in production!
 26 | DEBUG = True
 27 | 
 28 | ALLOWED_HOSTS = ['*']
 29 | 
 30 | 
 31 | # Application definition
 32 | 
 33 | INSTALLED_APPS = [
 34 |     'django.contrib.admin',
 35 |     'django.contrib.auth',
 36 |     'django.contrib.contenttypes',
 37 |     'django.contrib.sessions',
 38 |     'django.contrib.messages',
 39 |     'django.contrib.staticfiles',
 40 |     'web'
 41 | 
 42 | ]
 43 | 
 44 | MIDDLEWARE = [
 45 |     'django.middleware.security.SecurityMiddleware',
 46 |     'django.contrib.sessions.middleware.SessionMiddleware',
 47 |     'django.middleware.common.CommonMiddleware',
 48 |     'django.middleware.csrf.CsrfViewMiddleware',
 49 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
 50 |     'django.contrib.messages.middleware.MessageMiddleware',
 51 |     'django.middleware.clickjacking.XFrameOptionsMiddleware',
 52 | ]
 53 | 
 54 | ROOT_URLCONF = 'web.urls'
 55 | 
 56 | TEMPLATES = [
 57 |     {
 58 |         'BACKEND': 'django.template.backends.django.DjangoTemplates',
 59 |         'DIRS': [os.path.join(BASE_DIR, 'templates')],
 60 |         'APP_DIRS': True,
 61 |         'OPTIONS': {
 62 |             'context_processors': [
 63 |                 'django.template.context_processors.debug',
 64 |                 'django.template.context_processors.request',
 65 |                 'django.contrib.auth.context_processors.auth',
 66 |                 'django.contrib.messages.context_processors.messages',
 67 |                 'django.template.context_processors.media',
 68 |             ],
 69 |         },
 70 |     },
 71 | ]
 72 | 
 73 | WSGI_APPLICATION = 'web.wsgi.application'
 74 | 
 75 | 
 76 | # Database
 77 | # https://docs.djangoproject.com/en/3.0/ref/settings/#databases
 78 | 
 79 | DATABASES = {
 80 |     'default': {
 81 |         'ENGINE': 'django.db.backends.sqlite3',
 82 |         'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
 83 |     }
 84 | }
 85 | 
 86 | 
 87 | # Password validation
 88 | # https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators
 89 | 
 90 | AUTH_PASSWORD_VALIDATORS = [
 91 |     {
 92 |         'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
 93 |     },
 94 |     {
 95 |         'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
 96 |     },
 97 |     {
 98 |         'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
 99 |     },
100 |     {
101 |         'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
102 |     },
103 | ]
104 | 
105 | 
106 | # Internationalization
107 | # https://docs.djangoproject.com/en/3.0/topics/i18n/
108 | 
109 | LANGUAGE_CODE = 'zh-hans'
110 | 
111 | TIME_ZONE = 'Asia/Shanghai'
112 | 
113 | USE_I18N = True
114 | 
115 | USE_L10N = True
116 | 
117 | USE_TZ = True
118 | 
119 | 
120 | # Static files (CSS, JavaScript, Images)
121 | # https://docs.djangoproject.com/en/3.0/howto/static-files/
122 | 
123 | STATIC_URL = '/static/'
124 | STATIC_ROOT = os.path.join(BASE_DIR, 'static')
125 | # 设置图片等静态文件的路径
126 | STATICFILES_DIRS = (
127 |     ('images', os.path.join(STATIC_ROOT, 'images').replace('\\', '/')),
128 |     ('upload', os.path.join(STATIC_ROOT, 'upload').replace('\\', '/')),
129 |     ('videos', os.path.join(STATIC_ROOT, 'videos').replace('\\', '/')),
130 |     ('css', os.path.join(STATIC_ROOT, 'css').replace('\\', '/')),
131 |     ('js', os.path.join(STATIC_ROOT, 'js').replace('\\', '/')),
132 | )
133 | 
134 | MEDIA_URL = '/media/'
135 | MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
136 | 


--------------------------------------------------------------------------------
/web/web/urls.py:
--------------------------------------------------------------------------------
 1 | from django.contrib import admin
 2 | from django.contrib.staticfiles.urls import staticfiles_urlpatterns
 3 | from django.urls import path
 4 | from . import views
 5 | from . import settings
 6 | from django.conf.urls.static import static
 7 | from django.conf.urls import url
 8 | 
 9 | urlpatterns = [
10 |     path('admin/', admin.site.urls),
11 |     path('', views.upload),
12 |     url('video/', views.stream_video, name="video")
13 | ]
14 | 
15 | urlpatterns += staticfiles_urlpatterns()
16 | urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT)
17 | 


--------------------------------------------------------------------------------
/web/web/views.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Author: Zhou Chen
 3 | Date: 2020/3/17
 4 | Desc: desc
 5 | """
 6 | from django.shortcuts import render
 7 | import re
 8 | import mimetypes
 9 | from wsgiref.util import FileWrapper
10 | from django.http import StreamingHttpResponse
11 | import os
12 | import uuid
13 | from .settings import MEDIA_ROOT, STATIC_ROOT
14 | import sys
15 | sys.path.append("../")
16 | import yolo3_deepsort
17 | import utils.format_factory as ff
18 | 
19 | 
20 | def upload(request):
21 |     if request.method == 'POST':
22 |         files = request.FILES['video']
23 |         if len(files) > 0:
24 |             if not os.path.exists(MEDIA_ROOT):
25 |                 # 若不存在媒体存储目录
26 |                 os.mkdir(MEDIA_ROOT)
27 |             video = files
28 |             extension = os.path.splitext(video.name)[1]
29 |             # 重命名文件
30 |             file_name = '{}{}'.format(uuid.uuid4(), extension)
31 |             file_path = '{}/{}'.format(MEDIA_ROOT, file_name)
32 |             # 保存文件到本机
33 |             with open(file_path, 'wb') as f:
34 |                 for c in video.chunks():
35 |                     f.write(c)
36 |             # 视频保存本机之后调用模型
37 | 
38 |             args = yolo3_deepsort.Argument(file_path)
39 |             args.output_path = os.path.join(STATIC_ROOT, 'videos', 'rst.avi')
40 |             cfg = yolo3_deepsort.get_config()
41 |             cfg.merge_from_file(args.config_detection)
42 |             cfg.merge_from_file(args.config_deepsort)
43 |             with yolo3_deepsort.VideoTracker(cfg, args, file_path) as vdo_trk:
44 |                 vdo_trk.run_with_limit(300)
45 |             os.remove(os.path.join(STATIC_ROOT, 'videos', 'rst.mp4'))
46 |             ff.avi2mp4(args.output_path, os.path.join(STATIC_ROOT, 'videos', 'rst.mp4'))
47 | 
48 |             return render(request, 'show_video.html', {'filename': 'rst.mp4'})
49 |         else:
50 |             return render(request, 'upload.html')
51 |     return render(request, 'upload.html')
52 | 
53 | 
54 | def file_iterator(file_name, chunk_size=8192, offset=0, length=None):
55 |     with open(file_name, "rb") as f:
56 |         f.seek(offset, os.SEEK_SET)
57 |         remaining = length
58 |         while True:
59 |             bytes_length = chunk_size if remaining is None else min(remaining, chunk_size)
60 |             data = f.read(bytes_length)
61 |             if not data:
62 |                 break
63 |             if remaining:
64 |                 remaining -= len(data)
65 |             yield data
66 | 
67 | 
68 | def stream_video(request):
69 |     path = request.GET.get('path')
70 |     path = os.path.join("static", "videos", path)
71 |     range_header = request.META.get('HTTP_RANGE', '').strip()
72 |     range_re = re.compile(r'bytes\s*=\s*(\d+)\s*-\s*(\d*)', re.I)
73 |     range_match = range_re.match(range_header)
74 |     size = os.path.getsize(path)
75 |     content_type, encoding = mimetypes.guess_type(path)
76 |     content_type = content_type or 'application/octet-stream'
77 |     if range_match:
78 |         first_byte, last_byte = range_match.groups()
79 |         first_byte = int(first_byte) if first_byte else 0
80 |         last_byte = first_byte + 1024 * 1024 * 8  # 8M 每片,响应体最大体积
81 |         if last_byte >= size:
82 |             last_byte = size - 1
83 |         length = last_byte - first_byte + 1
84 |         resp = StreamingHttpResponse(file_iterator(path, offset=first_byte, length=length), status=206,
85 |                                      content_type=content_type)
86 |         resp['Content-Length'] = str(length)
87 |         resp['Content-Range'] = 'bytes %s-%s/%s' % (first_byte, last_byte, size)
88 |     else:
89 |         # 不是以视频流方式的获取时，以生成器方式返回整个文件，节省内存
90 |         resp = StreamingHttpResponse(FileWrapper(open(path, 'rb')), content_type=content_type)
91 |         resp['Content-Length'] = str(size)
92 |     resp['Accept-Ranges'] = 'bytes'
93 |     return resp


--------------------------------------------------------------------------------
/web/web/wsgi.py:
--------------------------------------------------------------------------------
 1 | """
 2 | WSGI config for web project.
 3 | 
 4 | It exposes the WSGI callable as a module-level variable named ``application``.
 5 | 
 6 | For more information on this file, see
 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/wsgi/
 8 | """
 9 | 
10 | import os
11 | 
12 | from django.core.wsgi import get_wsgi_application
13 | 
14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'web.settings')
15 | 
16 | application = get_wsgi_application()
17 | 


--------------------------------------------------------------------------------
/yolo3_deepsort.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import argparse
  5 | import torch
  6 | 
  7 | from detector import build_detector
  8 | from deepsort import build_tracker
  9 | from utils.draw_bbox import draw_boxes
 10 | from utils.parse_config import parse_config
 11 | 
 12 | current_path = os.path.dirname(__file__)
 13 | 
 14 | 
 15 | class VideoTracker(object):
 16 |     def __init__(self, config, arguments, video_path=None):
 17 |         self.cfg = config
 18 |         self.args = arguments
 19 |         self.video_fps = 60  # 默认输出视频FPS为60
 20 |         if video_path is not None:
 21 |             self.args.video_path = video_path
 22 |         is_use_cuda = self.args.use_cuda and torch.cuda.is_available()
 23 |         if not is_use_cuda:
 24 |             print("Running programme in cpu")
 25 |         else:
 26 |             print("Running programme in gpu")
 27 | 
 28 |         if self.args.display:
 29 |             # 创建可视化窗口
 30 |             cv2.namedWindow("test", cv2.WINDOW_NORMAL)
 31 |             cv2.resizeWindow("test", args.show_width, args.show_height)
 32 | 
 33 |         self.vdo = cv2.VideoCapture()
 34 |         self.detector = build_detector(self.cfg, use_cuda=is_use_cuda)
 35 |         self.deepsort = build_tracker(self.cfg, use_cuda=is_use_cuda)
 36 | 
 37 |     def __enter__(self):
 38 |         self.vdo.open(self.args.video_path)
 39 |         self.video_fps = self.vdo.get(cv2.CAP_PROP_FPS)
 40 |         print("input video fps", self.video_fps)
 41 |         self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
 42 |         self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
 43 |         if self.args.output_path:
 44 |             # 视频写入时尽量保证和原视频FPS一致
 45 |             writer_encoder = cv2.VideoWriter_fourcc(*"XVID") if self.args.output_type == "avi" else cv2.VideoWriter_fourcc(*"X264")
 46 |             self.writer = cv2.VideoWriter(self.args.output_path, writer_encoder, self.video_fps, (self.im_width, self.im_height))
 47 |         assert self.vdo.isOpened()
 48 |         return self
 49 | 
 50 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 51 |         if exc_type:
 52 |             print(exc_type, exc_value, exc_traceback)
 53 |         self.vdo.release()
 54 |         self.writer.release()
 55 |         cv2.destroyAllWindows()
 56 | 
 57 |     def run(self):
 58 |         idx_frame = 0  # 帧序列号
 59 |         fps_list = []
 60 |         while self.vdo.grab():
 61 |             # 循环取帧图像
 62 |             idx_frame += 1
 63 |             start = time.time()
 64 |             _, ori_im = self.vdo.retrieve()  # 解码并返回一帧图像
 65 |             im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
 66 | 
 67 |             # 目标检测
 68 |             bbox_xywh, cls_confidence, cls_ids = self.detector(im)
 69 |             if bbox_xywh is not None:
 70 |                 # 取出所有类别id为0的检测框，该类别id对应行人，具体可以查看yolo配置文件中的coco.names文件查看
 71 |                 mask = (cls_ids == 0)
 72 |                 bbox_xywh = bbox_xywh[mask]
 73 |                 cls_confidence = cls_confidence[mask]
 74 |                 bbox_xywh[:, 2:] *= 1.2  # 等比扩大检测框的宽度和高度，防止过小
 75 |                 # 跟踪
 76 |                 outputs = self.deepsort.update(bbox_xywh, cls_confidence, im)
 77 | 
 78 |                 # 绘制跟踪结果框
 79 |                 if len(outputs) > 0:
 80 |                     bbox_xyxy = outputs[:, :4]
 81 |                     identities = outputs[:, -1]
 82 |                     ori_im = draw_boxes(ori_im, bbox_xyxy, identities)
 83 | 
 84 |             end = time.time()
 85 |             fps = 1 / (end - start)
 86 |             print("frame index: {}, spend time: {:.03f}s, fps: {:.03f}".format(idx_frame, end - start, fps))
 87 |             fps_list.append(fps)
 88 | 
 89 |             if self.args.display:
 90 |                 cv2.imshow("test", ori_im)
 91 |                 cv2.waitKey(1)
 92 |             if idx_frame % self.args.frame_interval == 0:
 93 |                 # 按照间隔写入视频，并非每一帧都写入
 94 |                 if self.args.output_path:
 95 |                     self.writer.write(ori_im)
 96 | 
 97 |         print(sum(fps_list) / idx_frame)
 98 | 
 99 |     def run_with_limit(self, frame_limit=20):
100 |         idx_frame = 0
101 |         while self.vdo.grab() and idx_frame < frame_limit * self.args.frame_interval:
102 |             idx_frame += 1
103 |             start = time.time()
104 |             _, ori_im = self.vdo.retrieve()
105 |             im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
106 | 
107 |             bbox_xywh, cls_confidence, cls_ids = self.detector(im)
108 |             if bbox_xywh is not None:
109 |                 mask = (cls_ids == 0)
110 | 
111 |                 bbox_xywh = bbox_xywh[mask]
112 |                 bbox_xywh[:, 2:] *= 1.2
113 |                 cls_confidence = cls_confidence[mask]
114 | 
115 |                 outputs = self.deepsort.update(bbox_xywh, cls_confidence, im)
116 | 
117 |                 if len(outputs) > 0:
118 |                     bbox_xyxy = outputs[:, :4]
119 |                     identities = outputs[:, -1]
120 |                     ori_im = draw_boxes(ori_im, bbox_xyxy, identities)
121 | 
122 |             end = time.time()
123 |             print("frame index: {}, spend time: {:.03f}s, fps: {:.03f}".format(idx_frame, end - start, 1 / (end - start)))
124 | 
125 |             if self.args.display:
126 |                 cv2.imshow("test", ori_im)
127 |                 cv2.waitKey(1)
128 |             if idx_frame % self.args.frame_interval == 0:
129 |                 if self.args.output_path:
130 |                     self.writer.write(ori_im)
131 |                     # file_path = os.path.join(save_path, '{}.png'.format(idx_frame))
132 |                     # result_path.append(os.path.split(file_path)[-1])  # 只返回文件名，不包含完整路径，这是为了配合Django的静态文件设置
133 |                     # cv2.imwrite(file_path, ori_im)
134 |         return None
135 | 
136 | 
137 | def parse_arguments():
138 |     """
139 |     解析命令行脚本参数
140 |     :return:
141 |     """
142 |     parser = argparse.ArgumentParser()
143 |     parser.add_argument("--video_path", type=str, default='./data/TownCentreXVID.avi')  # 进行跟踪的源视频
144 |     parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yml")  # yolo3检测配置文件
145 |     parser.add_argument("--config_deepsort", type=str, default="./configs/deepsort.yml")  # deepsort跟踪配置文件
146 |     parser.add_argument("--frame_interval", type=int, default=1)  # 输出视频帧间隔
147 |     parser.add_argument("--show_window", dest="display", default=False)  # 是否视频控制台显示
148 |     parser.add_argument("--show_width", type=int, default=800)  # 输出视频宽度
149 |     parser.add_argument("--show_height", type=int, default=600)  # 输出视频高度
150 |     parser.add_argument("--output_path", type=str, default="./results/result.avi")  # 输出视频保存路径
151 |     parser.add_argument("--use_cuda", action="store_true", default=True)  # 是否使用GPU
152 |     parser.add_argument("--output_type", type=str, default="avi")
153 |     return parser.parse_args()
154 | 
155 | 
156 | class Argument(object):
157 |     def __init__(self, video_path):
158 |         """
159 |         模块调用参数，与上面的命令行参数选择其一，防止模块化不能调用命令行参数
160 |         :param video_path:
161 |         """
162 |         self.video_path = video_path  # 输入视频路径
163 |         self.config_detection = os.path.join(current_path, 'configs/yolov3.yml')  # 检测器配置文件
164 |         self.config_deepsort = os.path.join(current_path, 'configs/deepsort.yml')  # deepsort算法配置文件
165 |         self.display = False  # 默认API调用模式不显示opencv窗口
166 |         self.frame_interval = 1  # 输出帧间隔默认为1，此种情况下若输出视频与输入视频FPS为相等，则输出视频与输入视频等时长
167 |         self.show_width = 800  # 输出视频宽度
168 |         self.show_height = 600  # 输出视频高度
169 |         self.output_path = os.path.join(current_path, 'result/result.avi')  # 输出视频文件路径
170 |         self.output_type = "avi"
171 |         self.use_cuda = True  # 是否使用GPU
172 | 
173 | 
174 | def get_config():
175 |     return parse_config()
176 | 
177 | 
178 | if __name__ == "__main__":
179 |     args = parse_arguments()
180 |     cfg = parse_config()
181 |     cfg.merge_from_file(args.config_detection)  # 获取检测配置文件
182 |     cfg.merge_from_file(args.config_deepsort)  # 获取deepsort算法配置文件
183 | 
184 |     with VideoTracker(cfg, args) as vdo_trk:
185 |         vdo_trk.run()
186 | 


--------------------------------------------------------------------------------
/yolo3_deepsort_camera.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Author: Zhou Chen
  3 | Date: 2020/6/4
  4 | Desc: 实时摄像头跟踪的模块
  5 | """
  6 | import os
  7 | import cv2
  8 | import time
  9 | import argparse
 10 | import torch
 11 | 
 12 | from detector import build_detector
 13 | from deepsort import build_tracker
 14 | from utils.draw_bbox import draw_boxes
 15 | from utils.parse_config import parse_config
 16 | 
 17 | current_path = os.path.dirname(__file__)
 18 | 
 19 | 
 20 | class VideoTracker(object):
 21 |     def __init__(self, config, arguments, video_path=None):
 22 |         self.cfg = config
 23 |         self.args = arguments
 24 |         self.video_fps = 60  # 默认输出视频FPS为60
 25 |         if video_path is not None:
 26 |             self.args.video_path = video_path
 27 |         is_use_cuda = self.args.use_cuda and torch.cuda.is_available()
 28 |         if not is_use_cuda:
 29 |             print("Running programme in cpu")
 30 |         else:
 31 |             print("Running programme in gpu")
 32 | 
 33 |         if self.args.display:
 34 |             # 创建可视化窗口
 35 |             cv2.namedWindow("test", cv2.WINDOW_NORMAL)
 36 |             cv2.resizeWindow("test", args.show_width, args.show_height)
 37 | 
 38 |         self.camera = cv2.VideoCapture(0)
 39 |         self.video_width, self.video_height = args.show_width, args.show_height
 40 |         self.detector = build_detector(self.cfg, use_cuda=is_use_cuda)
 41 |         self.deepsort = build_tracker(self.cfg, use_cuda=is_use_cuda)
 42 | 
 43 |     def __enter__(self):
 44 |         self.video_fps = self.camera.get(cv2.CAP_PROP_FPS)
 45 |         print("camera capture fps:", self.video_fps)
 46 |         if self.args.output_path:
 47 |             # 视频写入时尽量保证和原视频FPS一致
 48 |             writer_encoder = cv2.VideoWriter_fourcc(*"XVID")
 49 |             self.writer = cv2.VideoWriter(self.args.output_path, writer_encoder, self.video_fps, (self.video_width, self.video_height))
 50 |         assert self.camera.isOpened()
 51 |         return self
 52 | 
 53 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 54 |         if exc_type:
 55 |             print(exc_type, exc_value, exc_traceback)
 56 |         self.camera.release()
 57 |         self.writer.release()
 58 |         cv2.destroyAllWindows()
 59 | 
 60 |     def run(self):
 61 |         idx_frame = 0  # 帧序列号
 62 |         fps_list = []
 63 |         while self.camera.isOpened():
 64 |             # 循环取帧图像
 65 |             idx_frame += 1
 66 |             start = time.time()
 67 |             _, ori_im = self.camera.read()  # 解码并返回一帧图像
 68 |             im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
 69 |             # 目标检测
 70 |             bbox_xywh, cls_confidence, cls_ids = self.detector(im)
 71 |             if bbox_xywh is not None:
 72 |                 # 取出所有类别id为0的检测框，该类别id对应行人，具体可以查看yolo配置文件中的coco.names文件查看
 73 |                 mask = (cls_ids == 0)
 74 |                 bbox_xywh = bbox_xywh[mask]
 75 |                 cls_confidence = cls_confidence[mask]
 76 |                 bbox_xywh[:, 2:] *= 1.2  # 等比扩大检测框的宽度和高度，防止过小
 77 |                 # 跟踪
 78 |                 outputs = self.deepsort.update(bbox_xywh, cls_confidence, im)
 79 | 
 80 |                 # 绘制跟踪结果框
 81 |                 if len(outputs) > 0:
 82 |                     bbox_xyxy = outputs[:, :4]
 83 |                     identities = outputs[:, -1]
 84 |                     ori_im = draw_boxes(ori_im, bbox_xyxy, identities)
 85 | 
 86 |             end = time.time()
 87 |             fps = 1 / (end - start)
 88 |             print("frame index: {}, spend time: {:.03f}s, fps: {:.03f}".format(idx_frame, end - start, fps))
 89 |             fps_list.append(fps)
 90 | 
 91 |             if self.args.display:
 92 |                 cv2.imshow("test", ori_im)
 93 |                 cv2.waitKey(1)
 94 |             if idx_frame % self.args.frame_interval == 0:
 95 |                 # 按照间隔写入视频，并非每一帧都写入
 96 |                 if self.args.output_path:
 97 |                     self.writer.write(ori_im)
 98 | 
 99 |         print(sum(fps_list) / idx_frame)
100 | 
101 | 
102 | def parse_arguments():
103 |     """
104 |     解析命令行脚本参数
105 |     :return:
106 |     """
107 |     parser = argparse.ArgumentParser()
108 |     parser.add_argument("--camera_id", type=int, default=0)  # 调用摄像头
109 |     parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yml")
110 |     parser.add_argument("--config_deepsort", type=str, default="./configs/deepsort.yml")
111 |     parser.add_argument("--frame_interval", type=int, default=1)  # 输出视频帧间隔
112 |     parser.add_argument("--show_window", dest="display", default=True)  # 是否视频控制台显示
113 |     parser.add_argument("--show_width", type=int, default=800)  # 输出视频宽度
114 |     parser.add_argument("--show_height", type=int, default=600)  # 输出视频高度
115 |     parser.add_argument("--output_path", type=str, default="./results/result.avi")  # 输出视频保存路径
116 |     parser.add_argument("--use_cuda", action="store_true", default=True)  # 是否使用GPU
117 |     return parser.parse_args()
118 | 
119 | 
120 | if __name__ == "__main__":
121 |     args = parse_arguments()
122 |     cfg = parse_config()
123 |     cfg.merge_from_file(args.config_detection)  # 获取检测配置文件
124 |     cfg.merge_from_file(args.config_deepsort)  # 获取deepsort算法配置文件
125 | 
126 |     with VideoTracker(cfg, args) as vdo_trk:
127 |         vdo_trk.run()
128 | 
129 | 


--------------------------------------------------------------------------------