├── .gitignore ├── README-zh.md ├── README.md ├── assets ├── algorithm.vsdx ├── cascade.png ├── loss.png ├── net.png ├── net_arch.vsdx ├── web.png ├── web_rst.png └── 算法流程.png ├── configs ├── deepsort.yml └── yolov3.yml ├── data └── tips.txt ├── deepsort ├── __init__.py ├── deep │ ├── __init__.py │ ├── false.png │ ├── feature_extractor.py │ ├── loss.png │ ├── model.py │ ├── train.png │ ├── train.py │ ├── true.png │ └── visualize.py ├── deep_sort.py └── sort │ ├── README.md │ ├── __init__.py │ ├── detection.py │ ├── iou_matching.py │ ├── kalman_filter.py │ ├── linear_assignment.py │ ├── nn_matching.py │ ├── preprocessing.py │ ├── track.py │ └── tracker.py ├── detector ├── FasterRCNN │ └── tips.txt ├── YOLO3 │ ├── __init__.py │ ├── cfg.py │ ├── cfg │ │ ├── coco.data │ │ ├── coco.names │ │ ├── darknet19_448.cfg │ │ ├── tiny-yolo-voc.cfg │ │ ├── tiny-yolo.cfg │ │ ├── voc.data │ │ ├── voc.names │ │ ├── voc_gaotie.data │ │ ├── yolo-voc.cfg │ │ ├── yolo.cfg │ │ ├── yolo_v3.cfg │ │ └── yolov3-tiny.cfg │ ├── darknet.py │ ├── detect.py │ ├── detector.py │ ├── nms │ │ ├── __init__.py │ │ ├── nms.py │ │ └── python_nms.py │ ├── region_layer.py │ ├── weight │ │ └── tips.txt │ ├── yolo_layer.py │ └── yolo_utils.py └── __init__.py ├── requirements.txt ├── utils ├── __init__.py ├── dataset_reconstruct.py ├── dataset_split.py ├── draw_bbox.py ├── format_factory.py └── parse_config.py ├── web ├── README.md ├── __init__.py ├── db.sqlite3 ├── manage.py ├── static │ ├── css │ │ └── video-js.min.css │ ├── images │ │ └── bg.png │ └── js │ │ └── video.min.js ├── templates │ ├── show_images.html │ ├── show_video.html │ └── upload.html └── web │ ├── __init__.py │ ├── asgi.py │ ├── settings.py │ ├── urls.py │ ├── views.py │ └── wsgi.py ├── yolo3_deepsort.py └── yolo3_deepsort_camera.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Folders 2 | __pycache__/ 3 | build/ 4 | *.egg-info 5 | .idea/ 6 | media/ 7 | web/static/images/ 8 | recording/ 9 | 10 | # Files 11 | *.weights 12 | *.t7 13 | *.mp4 14 | *.avi 15 | *.so 16 | *.zip 17 | *.jpg 18 | *.gif 19 | -------------------------------------------------------------------------------- /README-zh.md: -------------------------------------------------------------------------------- 1 | # 基于pytorch实现DeepSORT多目标跟踪 2 | > 该仓库参考 [ZQPei的项目](https://github.com/ZQPei/deep_sort_pytorch),我在其基础上进行了一些优化。 3 | 4 | ## 环境配置 5 | 基于Python3.6并在虚拟环境下安装如下几个核心包即可,具体见[requirements](./requirements.txt)文件即可。 6 | 7 | - pytorch>=1.0 8 | - numpy 9 | - scipy 10 | 11 | ## 运行脚本 12 | 使用如下命令对视频进行跟踪。 13 | 14 | `python yolo3_deepsort.py --video_path ../data/TownCenter.avi` 15 | 16 | 使用如下命令,打开默认摄像头,实时跟踪。 17 | 18 | `python yolo3_deepsort_camera.py` 19 | 20 | 21 | ## 跟踪结果 22 | 在示例视频上跟踪效果如下图。 23 | 24 | ![](./assets/demo.gif) 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepSORT multi-object detection implementation by pytorch 2 | > This project refer to [ZQPei's repo](https://github.com/ZQPei/deep_sort_pytorch),i did some optimization. 3 | 4 | 5 | ## environment 6 | install several following packages, more info in [requirements](./requirements.txt). 7 | 8 | - pytorch>=1.0 9 | - numpy 10 | - scipy 11 | 12 | 13 | ## run demo 14 | use next code in terminal to run tracking in a video file. 15 | 16 | `python yolo3_deepsort.py --video_path ../data/TownCenter.avi` 17 | 18 | use next code in terminal to run tracking in your camera capture. 19 | 20 | `python yolo3_deepsort_camera.py` 21 | 22 | ## results 23 | sample result as following picture. 24 | 25 | ![](./assets/demo.gif) -------------------------------------------------------------------------------- /assets/algorithm.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/algorithm.vsdx -------------------------------------------------------------------------------- /assets/cascade.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/cascade.png -------------------------------------------------------------------------------- /assets/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/loss.png -------------------------------------------------------------------------------- /assets/net.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/net.png -------------------------------------------------------------------------------- /assets/net_arch.vsdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/net_arch.vsdx -------------------------------------------------------------------------------- /assets/web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/web.png -------------------------------------------------------------------------------- /assets/web_rst.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/web_rst.png -------------------------------------------------------------------------------- /assets/算法流程.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/assets/算法流程.png -------------------------------------------------------------------------------- /configs/deepsort.yml: -------------------------------------------------------------------------------- 1 | DEEPSORT: 2 | # 预训练reid模型参数文件 3 | REID_CKPT: "./deepsort/deep/checkpoint/ckpt1.t7" 4 | # 最大余弦距离 5 | MAX_DIST: 0.2 6 | # 确认检测结果最小置信度 7 | MIN_CONFIDENCE: 0.3 8 | # NMS最大IOU 9 | NMS_MAX_OVERLAP: 0.5 10 | # IOU匹配时最大IOU距离 11 | MAX_IOU_DISTANCE: 0.7 12 | # 最大生命周期 13 | MAX_AGE: 50 14 | # 确认轨迹所需帧数 15 | N_INIT: 3 16 | # 临时特征向量容量,课减少保证运行速度 17 | NN_BUDGET: 100 18 | -------------------------------------------------------------------------------- /configs/yolov3.yml: -------------------------------------------------------------------------------- 1 | YOLOV3: 2 | CFG: "./detector/YOLO3/cfg/yolo_v3.cfg" 3 | WEIGHT: "./detector/YOLO3/weight/yolov3.weights" 4 | CLASS_NAMES: "./detector/YOLO3/cfg/coco.names" 5 | SCORE_THRESH: 0.5 6 | NMS_THRESH: 0.4 7 | -------------------------------------------------------------------------------- /data/tips.txt: -------------------------------------------------------------------------------- 1 | demo videos folder -------------------------------------------------------------------------------- /deepsort/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_sort import DeepSort 2 | 3 | 4 | __all__ = ['DeepSort', 'build_tracker'] 5 | 6 | 7 | def build_tracker(cfg, use_cuda): 8 | return DeepSort(cfg.DEEPSORT.REID_CKPT, 9 | max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, 10 | nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, 11 | max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda) 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /deepsort/deep/__init__.py: -------------------------------------------------------------------------------- 1 | from .feature_extractor import Net -------------------------------------------------------------------------------- /deepsort/deep/false.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/false.png -------------------------------------------------------------------------------- /deepsort/deep/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | 6 | from .model import Net 7 | 8 | 9 | class Extractor(object): 10 | def __init__(self, model_path, use_cuda=True): 11 | self.net = Net(reid=True, num_classes=751) 12 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 13 | state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict'] 14 | self.net.load_state_dict(state_dict) 15 | print("Loaded weights from {}.".format(model_path)) 16 | self.net.to(self.device) 17 | self.size = (64, 128) 18 | self.norm = transforms.Compose([ 19 | transforms.ToTensor(), 20 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 21 | ]) 22 | 23 | def _preprocess(self, im_crops): 24 | """ 25 | 特征提取器的图像预处理 26 | 归一到0-1 27 | 调整图像大小 28 | 图像标准化 29 | Torch张量化 30 | :param im_crops: 一个batch的RGB图像(单图需要放在列表中) 31 | :return: 32 | """ 33 | def _resize(im, size): 34 | return cv2.resize(im.astype(np.float32)/255., size) 35 | 36 | im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() 37 | return im_batch 38 | 39 | def __call__(self, im_crops): 40 | im_batch = self._preprocess(im_crops) 41 | with torch.no_grad(): 42 | im_batch = im_batch.to(self.device) 43 | features = self.net(im_batch) 44 | return features.cpu().numpy() 45 | 46 | 47 | def test(): 48 | def cosine(a, b, data_is_normalized=False): 49 | if not data_is_normalized: 50 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 51 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 52 | return np.dot(a, b.T) 53 | 54 | img1 = cv2.cvtColor(cv2.resize(cv2.imread("1.jpg"), (64, 128)), cv2.COLOR_BGR2RGB) 55 | img2 = cv2.cvtColor(cv2.resize(cv2.imread("2.jpg"), (64, 128)), cv2.COLOR_BGR2RGB) 56 | img3 = cv2.cvtColor(cv2.resize(cv2.imread("3.jpg"), (64, 128)), cv2.COLOR_BGR2RGB) 57 | imgs = [img1, img2, img3] 58 | extractor = Extractor("checkpoint/ckpt1.t7") 59 | feature = extractor(imgs) 60 | a = feature[0] 61 | b = feature[1] 62 | c = feature[2] 63 | import matplotlib.pyplot as plt 64 | fig = plt.figure() 65 | plt.subplot(1, 2, 1) 66 | plt.imshow(img1) 67 | plt.title(" ") 68 | plt.subplot(1, 2, 2) 69 | plt.imshow(img2) 70 | fig.suptitle("Cosine similarity:" + str(cosine(a.reshape(1, -1), b.reshape(1, -1), True)[0][0]) + "\n") 71 | plt.title(" ") 72 | plt.savefig("true.png") 73 | plt.show() 74 | 75 | import matplotlib.pyplot as plt 76 | fig = plt.figure() 77 | plt.subplot(1, 2, 1) 78 | plt.imshow(img1) 79 | plt.title(" ") 80 | plt.subplot(1, 2, 2) 81 | plt.imshow(img3) 82 | fig.suptitle("Cosine similarity:" + str(cosine(a.reshape(1, -1), c.reshape(1, -1), True)[0][0]) + "\n") 83 | plt.title(" ") 84 | plt.savefig("false.png") 85 | plt.show() 86 | 87 | 88 | if __name__ == '__main__': 89 | # 测试提取器 90 | test() 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /deepsort/deep/loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/loss.png -------------------------------------------------------------------------------- /deepsort/deep/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, in_channels, out_channels, is_downsample=False): 8 | super(BasicBlock, self).__init__() 9 | self.is_downsample = is_downsample 10 | if is_downsample: 11 | self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=2, padding=1, bias=False) 12 | else: 13 | self.conv1 = nn.Conv2d(in_channels, out_channels, 3, stride=1, padding=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(out_channels) 15 | self.relu = nn.ReLU(True) 16 | self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False) 17 | self.bn2 = nn.BatchNorm2d(out_channels) 18 | if is_downsample: 19 | self.downsample = nn.Sequential( 20 | nn.Conv2d(in_channels, out_channels, 1, stride=2, bias=False), 21 | nn.BatchNorm2d(out_channels) 22 | ) 23 | elif in_channels != out_channels: 24 | self.downsample = nn.Sequential( 25 | nn.Conv2d(in_channels, out_channels, 1, stride=1, bias=False), 26 | nn.BatchNorm2d(out_channels) 27 | ) 28 | self.is_downsample = True 29 | 30 | def forward(self, x): 31 | y = self.conv1(x) 32 | y = self.bn1(y) 33 | y = self.relu(y) 34 | y = self.conv2(y) 35 | y = self.bn2(y) 36 | if self.is_downsample: 37 | x = self.downsample(x) 38 | return F.relu(x.add(y), True) # 残差连接 39 | 40 | 41 | def make_layers(in_channels, out_channels, repeat_times, is_downsample=False): 42 | blocks = [] 43 | for i in range(repeat_times): 44 | if i == 0: 45 | blocks += [BasicBlock(in_channels, out_channels, is_downsample=is_downsample), ] 46 | else: 47 | blocks += [BasicBlock(out_channels, out_channels), ] 48 | return nn.Sequential(*blocks) 49 | 50 | 51 | class Net(nn.Module): 52 | def __init__(self, num_classes=1261, reid=False): 53 | """ 54 | 55 | :param num_classes: 分类器层输出的类别数目,Mars数据集训练集加测试集共1261类 56 | :param reid: 是否为reid模式,若为True,直接返回特征向量而不做分类 57 | """ 58 | super(Net, self).__init__() 59 | # 3 128 64 60 | self.conv = nn.Sequential( 61 | nn.Conv2d(3, 64, 3, stride=1, padding=1), 62 | nn.BatchNorm2d(64), 63 | nn.ReLU(inplace=True), 64 | nn.MaxPool2d(3, 2, padding=1), 65 | ) 66 | # 32 64 32 67 | self.layer1 = make_layers(64, 64, 2, False) 68 | # 32 64 32 69 | self.layer2 = make_layers(64, 128, 2, True) 70 | # 64 32 16 71 | self.layer3 = make_layers(128, 256, 2, True) 72 | # 128 16 8 73 | self.layer4 = make_layers(256, 512, 2, True) 74 | # 256 8 4 75 | self.avgpool = nn.AvgPool2d((8, 4), 1) 76 | # 256 1 1 77 | self.reid = reid 78 | self.classifier = nn.Sequential( 79 | nn.Linear(512, 256), 80 | nn.BatchNorm1d(256), 81 | nn.ReLU(inplace=True), 82 | nn.Dropout(), 83 | nn.Linear(256, num_classes), 84 | ) 85 | 86 | def forward(self, x): 87 | x = self.conv(x) 88 | x = self.layer1(x) 89 | x = self.layer2(x) 90 | x = self.layer3(x) 91 | x = self.layer4(x) 92 | x = self.avgpool(x) 93 | x = x.view(x.size(0), -1) 94 | # 256 95 | if self.reid: 96 | x = x / x.norm(p=2, dim=1, keepdim=True) # 张量单位化 97 | return x 98 | # 分类器 99 | x = self.classifier(x) 100 | return x 101 | 102 | 103 | if __name__ == '__main__': 104 | net = Net(reid=True) 105 | print(net) 106 | x = torch.randn(4, 3, 128, 64) 107 | y = net(x) 108 | print(y.shape) 109 | -------------------------------------------------------------------------------- /deepsort/deep/train.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/train.png -------------------------------------------------------------------------------- /deepsort/deep/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | import matplotlib.pyplot as plt 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | import torchvision 8 | 9 | from model import Net 10 | 11 | # 命令行参数配置 12 | parser = argparse.ArgumentParser(description="Train on Mars") 13 | parser.add_argument("--data-dir", default='/SISDC_GPFS/Home_SE/jiangm-jnu/xiaf-jnu/zhouchen/dataset/MARS-generated/', type=str) # 修改为自己的数据集目录 14 | parser.add_argument("--gpu-id", default=0, type=int) 15 | parser.add_argument("--lr", default=0.1, type=float) # 初始学习率 16 | parser.add_argument("--interval", '-i', default=20, type=int) # 日志输出间隔 17 | parser.add_argument('--resume', '-r', action='store_true', default=False) # 使用预训练模型 18 | args = parser.parse_args() 19 | 20 | # 确定训练设备 21 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu" 22 | 23 | if torch.cuda.is_available(): 24 | cudnn.benchmark = True # 对固定的网络结构优化 25 | 26 | # 数据载入 27 | root = args.data_dir 28 | train_dir = os.path.join(root, "bbox_train") 29 | test_dir = os.path.join(root, "bbox_test") 30 | # 图像预处理 31 | transform_train = torchvision.transforms.Compose([ 32 | torchvision.transforms.Resize((128, 64)), # 如果采用Market数据集这一步可以删去,Mars必须要这一步 33 | torchvision.transforms.RandomCrop((128, 64), padding=4), 34 | torchvision.transforms.RandomHorizontalFlip(), 35 | torchvision.transforms.ToTensor(), 36 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 37 | ]) 38 | transform_test = torchvision.transforms.Compose([ 39 | torchvision.transforms.Resize((128, 64)), 40 | torchvision.transforms.ToTensor(), 41 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 42 | ]) 43 | 44 | trainloader = torch.utils.data.DataLoader( 45 | torchvision.datasets.ImageFolder(train_dir, transform=transform_train), 46 | batch_size=128, shuffle=True 47 | ) 48 | 49 | testloader = torch.utils.data.DataLoader( 50 | torchvision.datasets.ImageFolder(test_dir, transform=transform_test), 51 | batch_size=128, shuffle=True 52 | ) 53 | 54 | num_classes = len(trainloader.dataset.classes) 55 | 56 | # net definition 57 | start_epoch = 0 58 | net = Net(num_classes=num_classes) 59 | 60 | if args.resume: 61 | # 是否使用预训练参数 62 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 63 | print('Loaded pretrained weights from checkpoint file') 64 | checkpoint = torch.load("./checkpoint/ckpt.t7") # 该字典含有net_dict,acc,epoch三个键 65 | net_dict = checkpoint['net_dict'] 66 | net.load_state_dict(net_dict) 67 | 68 | net.to(device) 69 | 70 | # 使用交叉熵和SGD 71 | criterion = torch.nn.CrossEntropyLoss() 72 | optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4) 73 | best_acc = 0.0 74 | 75 | 76 | # train function for each epoch 77 | def train(epoch): 78 | print("Epoch{}".format(epoch + 1)) 79 | print("Training...") 80 | net.train() 81 | training_loss = 0. 82 | train_loss = 0. 83 | correct = 0 84 | total = 0 85 | interval = args.interval 86 | start = time.time() 87 | for idx, (inputs, labels) in enumerate(trainloader): 88 | # 前向传播 89 | inputs, labels = inputs.to(device), labels.to(device) 90 | outputs = net(inputs) 91 | loss = criterion(outputs, labels) 92 | 93 | # 反向传播 94 | optimizer.zero_grad() 95 | loss.backward() 96 | optimizer.step() 97 | 98 | # 计算指标 99 | training_loss += loss.item() 100 | train_loss += loss.item() 101 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 102 | total += labels.size(0) 103 | 104 | if (idx + 1) % interval == 0: 105 | # 固定step输出一次信息 106 | end = time.time() 107 | print("[Progress:{:.1f}%] time:{:.2f}s Loss:{:.5f} Acc:{:.3f}%".format( 108 | 100. * (idx + 1) / len(trainloader), end - start, training_loss / interval, 109 | 100. * correct / total 110 | )) 111 | training_loss = 0.0 112 | start = time.time() 113 | 114 | return train_loss / len(trainloader), 1. - correct / total 115 | 116 | 117 | def test(epoch): 118 | global best_acc 119 | print("Epoch{}".format(epoch + 1)) 120 | print("Testing...") 121 | net.eval() 122 | test_loss = 0. 123 | correct = 0 124 | total = 0 125 | start = time.time() 126 | with torch.no_grad(): 127 | for idx, (inputs, labels) in enumerate(testloader): 128 | inputs, labels = inputs.to(device), labels.to(device) 129 | outputs = net(inputs) 130 | loss = criterion(outputs, labels) 131 | 132 | test_loss += loss.item() 133 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 134 | total += labels.size(0) 135 | 136 | end = time.time() 137 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 138 | 100. * (idx + 1) / len(testloader), end - start, test_loss / len(testloader), correct, total, 139 | 100. * correct / total 140 | )) 141 | 142 | # 保存训练参数 143 | acc = 100. * correct / total 144 | if acc > best_acc: 145 | # 始终保留最好的参数,如果过拟合,则不保留参数 146 | best_acc = acc 147 | print("Saving parameters to checkpoint/ckpt.t7") 148 | checkpoint = { 149 | 'net_dict': net.state_dict(), 150 | } 151 | if not os.path.isdir('checkpoint'): 152 | os.mkdir('checkpoint') 153 | torch.save(checkpoint, './checkpoint/ckpt.t7') 154 | 155 | return test_loss / len(testloader), 1. - correct / total 156 | 157 | 158 | # 绘制训练曲线 159 | x_epoch = [] 160 | record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []} 161 | fig = plt.figure(figsize=(18, 6)) 162 | ax0 = fig.add_subplot(121, title="loss") 163 | ax1 = fig.add_subplot(122, title="err") 164 | 165 | 166 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err): 167 | global record 168 | record['train_loss'].append(train_loss) 169 | record['train_err'].append(train_err) 170 | record['test_loss'].append(test_loss) 171 | record['test_err'].append(test_err) 172 | 173 | x_epoch.append(epoch) 174 | ax0.plot(x_epoch, record['train_loss'], 'bo-', label='training') 175 | ax0.plot(x_epoch, record['test_loss'], 'ro-', label='validation') 176 | ax1.plot(x_epoch, record['train_err'], 'bo-', label='training') 177 | ax1.plot(x_epoch, record['test_err'], 'ro-', label='validation') 178 | if epoch == 0: 179 | ax0.legend() 180 | ax1.legend() 181 | fig.savefig("train.png") 182 | 183 | 184 | def lr_decay(): 185 | # 设置学习率衰减 186 | global optimizer 187 | for params in optimizer.param_groups: 188 | params['lr'] *= 0.1 189 | lr = params['lr'] 190 | print("Learning rate adjusted to {}".format(lr)) 191 | 192 | 193 | def main(): 194 | # 训练50轮 195 | for epoch in range(50): 196 | train_loss, train_err = train(epoch) 197 | test_loss, test_err = test(epoch) 198 | draw_curve(epoch, train_loss, train_err, test_loss, test_err) 199 | if (epoch + 1) % 20 == 0: 200 | lr_decay() 201 | 202 | 203 | if __name__ == '__main__': 204 | main() 205 | -------------------------------------------------------------------------------- /deepsort/deep/true.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/deep/true.png -------------------------------------------------------------------------------- /deepsort/deep/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Zhou Chen 3 | Date: 2020/5/21 4 | Desc: desc 5 | """ 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def parse_txt(filepath="log_train.txt"): 10 | loss_list = [] 11 | with open(filepath, 'r', encoding="utf8") as f: 12 | line = f.readline().strip() 13 | while line: 14 | loss = float(line.split(" ")[2].split(":")[-1]) 15 | loss_list.append(loss) 16 | line = f.readline().strip() 17 | return loss_list 18 | 19 | 20 | def draw_his(loss): 21 | plt.figure() 22 | plt.plot(list(range(len(loss))), loss) 23 | plt.savefig('loss.png') 24 | plt.show() 25 | 26 | 27 | if __name__ == '__main__': 28 | rst = parse_txt() 29 | draw_his(rst) -------------------------------------------------------------------------------- /deepsort/deep_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .deep.feature_extractor import Extractor 5 | from .sort.nn_matching import NearestNeighborDistanceMetric 6 | from .sort.preprocessing import non_max_suppression 7 | from .sort.detection import Detection 8 | from .sort.tracker import Tracker 9 | 10 | __all__ = ['DeepSort'] 11 | 12 | 13 | class DeepSort(object): 14 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, 15 | max_age=70, n_init=3, nn_budget=100, use_cuda=True): 16 | self.min_confidence = min_confidence 17 | self.nms_max_overlap = nms_max_overlap 18 | self.extractor = Extractor(model_path, use_cuda=use_cuda) 19 | metric = NearestNeighborDistanceMetric("cosine", max_dist, nn_budget) 20 | self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) 21 | self.height = None 22 | self.width = None 23 | 24 | def update(self, bbox_xywh, confidences, ori_img): 25 | self.height, self.width = ori_img.shape[:2] 26 | features = self._get_features(bbox_xywh, ori_img) # 提取深度特征 27 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 28 | detections = [Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(confidences) if 29 | conf > self.min_confidence] # 只保留大于最小置信度的检测框 30 | 31 | # 使用非极大抑制消除部分bbox 32 | boxes = np.array([d.tlwh for d in detections]) 33 | scores = np.array([d.confidence for d in detections]) 34 | indices = non_max_suppression(boxes, self.nms_max_overlap, scores) 35 | detections = [detections[i] for i in indices] 36 | 37 | # update tracker 38 | self.tracker.predict() 39 | self.tracker.update(detections) 40 | 41 | # output bbox identities 42 | outputs = [] 43 | for track in self.tracker.tracks: 44 | if not track.is_confirmed() or track.time_since_update > 1: 45 | continue 46 | box = track.to_tlwh() 47 | x1, y1, x2, y2 = self._tlwh_to_xyxy(box) 48 | track_id = track.track_id 49 | outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) 50 | if len(outputs) > 0: 51 | outputs = np.stack(outputs, axis=0) 52 | return outputs 53 | 54 | @staticmethod 55 | def _xywh_to_tlwh(bbox_xywh): 56 | """ 57 | 转化(xc, yc, w, h)为(xtl,ytl,w, h) 58 | """ 59 | if isinstance(bbox_xywh, np.ndarray): 60 | bbox_tlwh = bbox_xywh.copy() 61 | elif isinstance(bbox_xywh, torch.Tensor): 62 | bbox_tlwh = bbox_xywh.clone() 63 | else: 64 | bbox_tlwh = None 65 | bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. 66 | bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. 67 | return bbox_tlwh 68 | 69 | def _xywh_to_xyxy(self, bbox_xywh): 70 | x, y, w, h = bbox_xywh 71 | x1 = max(int(x - w / 2), 0) 72 | x2 = min(int(x + w / 2), self.width - 1) 73 | y1 = max(int(y - h / 2), 0) 74 | y2 = min(int(y + h / 2), self.height - 1) 75 | return x1, y1, x2, y2 76 | 77 | def _tlwh_to_xyxy(self, bbox_tlwh): 78 | x, y, w, h = bbox_tlwh 79 | x1 = max(int(x), 0) 80 | x2 = min(int(x + w), self.width - 1) 81 | y1 = max(int(y), 0) 82 | y2 = min(int(y + h), self.height - 1) 83 | return x1, y1, x2, y2 84 | 85 | def _get_features(self, bbox_xywh, ori_img): 86 | """ 87 | 获得图像的检测块的深度特征 88 | """ 89 | im_crops = [] 90 | for box in bbox_xywh: 91 | x1, y1, x2, y2 = self._xywh_to_xyxy(box) 92 | im = ori_img[y1:y2, x1:x2] 93 | im_crops.append(im) 94 | if im_crops: 95 | features = self.extractor(im_crops) 96 | else: 97 | features = np.array([]) 98 | return features 99 | -------------------------------------------------------------------------------- /deepsort/sort/README.md: -------------------------------------------------------------------------------- 1 | # SORT跟踪算法实现 2 | 3 | 本部分代码参考官方代码的实现 -------------------------------------------------------------------------------- /deepsort/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/deepsort/sort/__init__.py -------------------------------------------------------------------------------- /deepsort/sort/detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Detection(object): 5 | """ 6 | 检测基类,不论使用什么检测框架,最后由该类处理 7 | """ 8 | 9 | def __init__(self, tlwh, confidence, feature): 10 | """ 11 | 12 | :param tlwh: bbox (x, y, w, h) 13 | :param confidence: 置信度 14 | :param feature: 特征向量 15 | """ 16 | self.tlwh = np.asarray(tlwh, dtype=np.float) 17 | self.confidence = float(confidence) 18 | self.feature = np.asarray(feature, dtype=np.float32) 19 | 20 | def to_tlbr(self): 21 | """ 22 | 转换bbox为(top left bottom right)的格式即(minx miny maxx maxy)_ 23 | :return: 24 | """ 25 | ret = self.tlwh.copy() 26 | ret[2:] += ret[:2] 27 | return ret 28 | 29 | def to_xyah(self): 30 | """ 31 | 转换bbox为(center x, center y, aspect ration, height) 32 | :return: 33 | """ 34 | ret = self.tlwh.copy() 35 | ret[:2] += ret[2:] / 2 36 | ret[2] /= ret[3] 37 | return ret 38 | -------------------------------------------------------------------------------- /deepsort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from . import linear_assignment 3 | 4 | 5 | def iou(bbox, candidates): 6 | """ 7 | 计算IOU 8 | :param bbox: bbox like (top left x,top left y, width, height) 9 | :param candidates:候选跟踪框矩阵(每行一个) 格式同bbox 10 | :return: 11 | """ 12 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 13 | candidates_tl = candidates[:, :2] 14 | candidates_br = candidates[:, :2] + candidates[:, 2:] 15 | 16 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 17 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 18 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 19 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 20 | wh = np.maximum(0., br - tl) 21 | 22 | area_intersection = wh.prod(axis=1) 23 | area_bbox = bbox[2:].prod() 24 | area_candidates = candidates[:, 2:].prod(axis=1) 25 | return area_intersection / (area_bbox + area_candidates - area_intersection) 26 | 27 | 28 | def iou_cost(tracks, detections, track_indices=None, 29 | detection_indices=None): 30 | """ 31 | IOU距离 32 | :param tracks: 一个列表的轨迹 33 | :param detections : 一个列表的检测 34 | :param track_indices : 一个该被匹配的轨迹下标列表 35 | :param detection_indices : 一个该被匹配的检测下标列表 36 | :return: 返回代价矩阵,维度(len(track_indices), len(detection_indices)) 37 | 每个元素(i,j)1 - iou(tracks[track_indices[i]], detections[detection_indices[j]]) 38 | """ 39 | if track_indices is None: 40 | track_indices = np.arange(len(tracks)) 41 | if detection_indices is None: 42 | detection_indices = np.arange(len(detections)) 43 | 44 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 45 | for row, track_idx in enumerate(track_indices): 46 | if tracks[track_idx].time_since_update > 1: 47 | cost_matrix[row, :] = linear_assignment.INFTY_COST 48 | continue 49 | 50 | bbox = tracks[track_idx].to_tlwh() 51 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 52 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 53 | return cost_matrix 54 | -------------------------------------------------------------------------------- /deepsort/sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | """ 2 | 本模块参考SORT论文源码实现的卡尔曼滤波 3 | """ 4 | import numpy as np 5 | import scipy.linalg 6 | 7 | # 具有N个自由度的卡方分布的0.95分位数的表,取自matlab中chi2inv函数,作为Mahalanobis阈值 8 | chi2inv95 = { 9 | 1: 3.8415, 10 | 2: 5.9915, 11 | 3: 7.8147, 12 | 4: 9.4877, 13 | 5: 11.070, 14 | 6: 12.592, 15 | 7: 14.067, 16 | 8: 15.507, 17 | 9: 16.919} 18 | 19 | 20 | class KalmanFilter(object): 21 | """ 22 | 图像空间预测bbox的卡尔曼滤波 23 | 8维空间 24 | 目标移动按照匀速模型,bbox位置作为状态空间的直接观测(线性观测模型)。 25 | """ 26 | 27 | def __init__(self): 28 | ndim, dt = 4, 1. 29 | 30 | # 参数初始化 31 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 32 | for i in range(ndim): 33 | self._motion_mat[i, ndim + i] = dt 34 | self._update_mat = np.eye(ndim, 2 * ndim) 35 | 36 | # 模型不确定性控制权重 37 | self._std_weight_position = 1. / 20 38 | self._std_weight_velocity = 1. / 160 39 | 40 | def initiate(self, measurement): 41 | """ 42 | 测量中创建跟踪,本项目指的是从检测结果中创建 43 | 初始化均值和协方差 44 | 检测格式为(cx,cy,a,h) 45 | 返回均值(8维)和协方差(8*8维),未观测到的速度初始化为0 46 | """ 47 | mean_pos = measurement 48 | mean_vel = np.zeros_like(mean_pos) 49 | mean = np.r_[mean_pos, mean_vel] 50 | 51 | std = [ 52 | 2 * self._std_weight_position * measurement[3], 53 | 2 * self._std_weight_position * measurement[3], 54 | 1e-2, 55 | 2 * self._std_weight_position * measurement[3], 56 | 10 * self._std_weight_velocity * measurement[3], 57 | 10 * self._std_weight_velocity * measurement[3], 58 | 1e-5, 59 | 10 * self._std_weight_velocity * measurement[3]] 60 | covariance = np.diag(np.square(std)) 61 | return mean, covariance 62 | 63 | def predict(self, mean, covariance): 64 | """ 65 | 66 | Parameters 67 | ---------- 68 | mean 上一帧目标状态的均值向量(8维) 69 | covariance 上一帧目标状态的协方差矩阵(8*8维) 70 | 71 | Returns 72 | 预测状态的相应均值和协方差 73 | ------- 74 | 75 | """ 76 | std_pos = [ 77 | self._std_weight_position * mean[3], 78 | self._std_weight_position * mean[3], 79 | 1e-2, 80 | self._std_weight_position * mean[3]] 81 | std_vel = [ 82 | self._std_weight_velocity * mean[3], 83 | self._std_weight_velocity * mean[3], 84 | 1e-5, 85 | self._std_weight_velocity * mean[3]] 86 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 87 | 88 | mean = np.dot(self._motion_mat, mean) 89 | covariance = np.linalg.multi_dot(( 90 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 91 | 92 | return mean, covariance 93 | 94 | def project(self, mean, covariance): 95 | """ 96 | 投影状态分布到测量空间 97 | Parameters 98 | ---------- 99 | mean 状态的均值 100 | covariance 状态的协方差 101 | 102 | Returns 103 | 给定状态估计的均值和方差 104 | ------- 105 | 106 | """ 107 | std = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-1, 111 | self._std_weight_position * mean[3]] 112 | innovation_cov = np.diag(np.square(std)) 113 | 114 | mean = np.dot(self._update_mat, mean) 115 | covariance = np.linalg.multi_dot(( 116 | self._update_mat, covariance, self._update_mat.T)) 117 | return mean, covariance + innovation_cov 118 | 119 | def update(self, mean, covariance, measurement): 120 | """ 121 | 状态更新 122 | Parameters 123 | ---------- 124 | mean 125 | covariance 126 | measurement 127 | 128 | Returns 129 | ------- 130 | 131 | """ 132 | projected_mean, projected_cov = self.project(mean, covariance) 133 | 134 | chol_factor, lower = scipy.linalg.cho_factor( 135 | projected_cov, lower=True, check_finite=False) 136 | # 计算卡尔曼增益 137 | kalman_gain = scipy.linalg.cho_solve( 138 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 139 | check_finite=False).T 140 | innovation = measurement - projected_mean 141 | 142 | new_mean = mean + np.dot(innovation, kalman_gain.T) 143 | new_covariance = covariance - np.linalg.multi_dot(( 144 | kalman_gain, projected_cov, kalman_gain.T)) 145 | return new_mean, new_covariance 146 | 147 | def gating_distance(self, mean, covariance, measurements, 148 | only_position=False): 149 | 150 | 151 | mean, covariance = self.project(mean, covariance) 152 | if only_position: 153 | mean, covariance = mean[:2], covariance[:2, :2] 154 | measurements = measurements[:, :2] 155 | 156 | cholesky_factor = np.linalg.cholesky(covariance) # Cholesky分解 157 | d = measurements - mean 158 | z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) 159 | squared_maha = np.sum(z * z, axis=0) 160 | return squared_maha 161 | -------------------------------------------------------------------------------- /deepsort/sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.optimize import linear_sum_assignment as linear_assignment 3 | from . import kalman_filter 4 | 5 | INFTY_COST = 1e+5 6 | 7 | 8 | def min_cost_matching(distance_metric, max_distance, tracks, detections, track_indices=None, detection_indices=None): 9 | """ 10 | 使用匈牙利算法解决线性分配问题 11 | Parameters 12 | ---------- 13 | distance_metric 轨迹集检测和他们的下标 14 | max_distance 最大距离阈值,大于此距离的关联无效 15 | tracks 16 | detections 17 | track_indices 18 | detection_indices 19 | 20 | Returns 21 | 匹配上的轨迹和检测 22 | 未匹配的轨迹 23 | 未匹配的检测 24 | ------- 25 | 26 | """ 27 | if track_indices is None: 28 | track_indices = np.arange(len(tracks)) 29 | if detection_indices is None: 30 | detection_indices = np.arange(len(detections)) 31 | 32 | if len(detection_indices) == 0 or len(track_indices) == 0: 33 | return [], track_indices, detection_indices # Nothing to match. 34 | 35 | cost_matrix = distance_metric( 36 | tracks, detections, track_indices, detection_indices) 37 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 38 | 39 | row_indices, col_indices = linear_assignment(cost_matrix) 40 | 41 | matches, unmatched_tracks, unmatched_detections = [], [], [] 42 | for col, detection_idx in enumerate(detection_indices): 43 | if col not in col_indices: 44 | unmatched_detections.append(detection_idx) 45 | for row, track_idx in enumerate(track_indices): 46 | if row not in row_indices: 47 | unmatched_tracks.append(track_idx) 48 | for row, col in zip(row_indices, col_indices): 49 | track_idx = track_indices[row] 50 | detection_idx = detection_indices[col] 51 | if cost_matrix[row, col] > max_distance: 52 | # 如果组合后的cost大于阈值还是认为不匹配,移到不匹配列表中 53 | unmatched_tracks.append(track_idx) 54 | unmatched_detections.append(detection_idx) 55 | else: 56 | matches.append((track_idx, detection_idx)) 57 | return matches, unmatched_tracks, unmatched_detections 58 | 59 | 60 | def matching_cascade(distance_metric, max_distance, cascade_depth, tracks, detections, track_indices=None, 61 | detection_indices=None): 62 | """ 63 | 级联匹配 64 | 参数和返回类似上面 65 | Parameters 66 | ---------- 67 | distance_metric 68 | max_distance 69 | cascade_depth 70 | tracks 71 | detections 72 | track_indices 73 | detection_indices 74 | 75 | Returns 76 | ------- 77 | 78 | """ 79 | 80 | if track_indices is None: 81 | track_indices = list(range(len(tracks))) 82 | if detection_indices is None: 83 | detection_indices = list(range(len(detections))) 84 | 85 | unmatched_detections = detection_indices 86 | matches = [] 87 | for level in range(cascade_depth): 88 | if len(unmatched_detections) == 0: # No detections left 89 | break 90 | 91 | track_indices_l = [ 92 | k for k in track_indices 93 | if tracks[k].time_since_update == 1 + level 94 | ] 95 | if len(track_indices_l) == 0: # Nothing to match at this level 96 | continue 97 | 98 | matches_l, _, unmatched_detections = min_cost_matching(distance_metric, max_distance, tracks, detections, 99 | track_indices_l, unmatched_detections) 100 | matches += matches_l 101 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 102 | return matches, unmatched_tracks, unmatched_detections 103 | 104 | 105 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, track_indices, detection_indices, 106 | gated_cost=INFTY_COST, only_position=False): 107 | """ 108 | 使用马氏距离进一步筛选代价矩阵 109 | 门控矩阵的作用就是通过计算卡尔曼滤波的状态分布和测量值之间的距离对代价矩阵进行限制。 110 | 代价矩阵中的距离是Track和Detection之间的表观相似度,假如一个轨迹要去匹配两个表观特征非常相似的Detection,这样就很容易出错, 111 | 但是这个时候分别让两个Detection计算与这个轨迹的马氏距离, 112 | 并使用一个阈值gating_threshold进行限制,所以就可以将马氏距离较远的那个Detection区分开,可以降低错误的匹配。 113 | """ 114 | gating_dim = 2 if only_position else 4 115 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 116 | measurements = np.asarray([detections[i].to_xyah() for i in detection_indices]) 117 | for row, track_idx in enumerate(track_indices): 118 | track = tracks[track_idx] 119 | gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position) 120 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 121 | return cost_matrix 122 | -------------------------------------------------------------------------------- /deepsort/sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def _pdist(a, b): 5 | """ 6 | 计算欧式距离 7 | Parameters 8 | """ 9 | a, b = np.asarray(a), np.asarray(b) 10 | if len(a) == 0 or len(b) == 0: 11 | return np.zeros((len(a), len(b))) 12 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 13 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 14 | r2 = np.clip(r2, 0., float(np.inf)) 15 | return r2 16 | 17 | 18 | def _cosine_distance(a, b, data_is_normalized=False): 19 | """ 20 | 计算余弦距离 21 | """ 22 | if not data_is_normalized: 23 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 24 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 25 | return 1. - np.dot(a, b.T) 26 | 27 | 28 | def _nn_euclidean_distance(x, y): 29 | distances = _pdist(x, y) 30 | return np.maximum(0.0, distances.min(axis=0)) 31 | 32 | 33 | def _nn_cosine_distance(x, y): 34 | distances = _cosine_distance(x, y) 35 | return distances.min(axis=0) 36 | 37 | 38 | class NearestNeighborDistanceMetric(object): 39 | 40 | def __init__(self, metric, matching_threshold, budget=None): 41 | """ 42 | metric "euclidean" or "cosine" 43 | matching_threshold 匹配阈值,大于此认为无效匹配 44 | budget 如果不是None,则将每个类的样本最多固定为这个数字。当达到budget大小时,删除最老的样本。 45 | """ 46 | 47 | if metric == "euclidean": 48 | self._metric = _nn_euclidean_distance 49 | elif metric == "cosine": 50 | self._metric = _nn_cosine_distance 51 | else: 52 | raise ValueError("Invalid metric; must be either 'euclidean' or 'cosine'") 53 | self.matching_threshold = matching_threshold 54 | self.budget = budget 55 | self.samples = {} 56 | 57 | def partial_fit(self, features, targets, active_targets): 58 | """ 59 | 使用新数据更新距离指标 60 | ---------- 61 | features M维的N个特征 62 | targets 关联目标Id的数组 63 | active_targets 场景中当前存在的目标列表 64 | """ 65 | for feature, target in zip(features, targets): 66 | self.samples.setdefault(target, []).append(feature) 67 | if self.budget is not None: 68 | self.samples[target] = self.samples[target][-self.budget:] 69 | self.samples = {k: self.samples[k] for k in active_targets} 70 | 71 | def distance(self, features, targets): 72 | """ 73 | 比较feature和targets之间的距离,返回一个代价矩阵 74 | 在匹配阶段,将distance封装为gated_metric,进行外观信息(reid得到的深度特征)+运动信息(马氏距离用于度量两个分布相似程度) 75 | 76 | """ 77 | cost_matrix = np.zeros((len(targets), len(features))) 78 | for i, target in enumerate(targets): 79 | cost_matrix[i, :] = self._metric(self.samples[target], features) 80 | return cost_matrix 81 | -------------------------------------------------------------------------------- /deepsort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 5 | """ 6 | nms算法的Python实现 7 | :param boxes: ROI矩阵,格式为(x, y, w, h) 8 | :param max_bbox_overlap:覆盖高于该值被抑制 9 | :param scores:检测器置信度 10 | :return: 11 | Examples 12 | -------- 13 | 14 | >>> boxes = [d.roi for d in detections] 15 | >>> scores = [d.confidence for d in detections] 16 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 17 | >>> detections = [detections[i] for i in indices] 18 | """ 19 | if len(boxes) == 0: 20 | return [] 21 | 22 | boxes = boxes.astype(np.float) 23 | pick = [] 24 | 25 | x1 = boxes[:, 0] 26 | y1 = boxes[:, 1] 27 | x2 = boxes[:, 2] + boxes[:, 0] 28 | y2 = boxes[:, 3] + boxes[:, 1] 29 | 30 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 31 | if scores is not None: 32 | idxs = np.argsort(scores) 33 | else: 34 | idxs = np.argsort(y2) 35 | 36 | while len(idxs) > 0: 37 | last = len(idxs) - 1 38 | i = idxs[last] 39 | pick.append(i) 40 | 41 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 42 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 43 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 44 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 45 | 46 | w = np.maximum(0, xx2 - xx1 + 1) 47 | h = np.maximum(0, yy2 - yy1 + 1) 48 | 49 | overlap = (w * h) / area[idxs[:last]] 50 | 51 | idxs = np.delete( 52 | idxs, np.concatenate( 53 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 54 | 55 | return pick 56 | -------------------------------------------------------------------------------- /deepsort/sort/track.py: -------------------------------------------------------------------------------- 1 | class TrackState: 2 | """ 3 | 轨迹状态 4 | """ 5 | Tentative = 1 6 | Confirmed = 2 7 | Deleted = 3 8 | 9 | 10 | class Track: 11 | """ 12 | 包含一个轨迹的所有信息 13 | """ 14 | 15 | def __init__(self, mean, covariance, track_id, n_init, max_age, 16 | feature=None): 17 | self.mean = mean 18 | self.covariance = covariance 19 | self.track_id = track_id 20 | self.hits = 1 # 命中次数 21 | self.time_since_update = 0 22 | 23 | self.state = TrackState.Tentative # 创建时的状态为Tentative 24 | self.features = [] 25 | if feature is not None: 26 | self.features.append(feature) 27 | 28 | self._n_init = n_init 29 | self._max_age = max_age 30 | 31 | def to_tlwh(self): 32 | """ 33 | 当前目标位置,格式转换 34 | Returns 35 | ------- 36 | 37 | """ 38 | ret = self.mean[:4].copy() 39 | ret[2] *= ret[3] 40 | ret[:2] -= ret[2:] / 2 41 | return ret 42 | 43 | def to_tlbr(self): 44 | ret = self.to_tlwh() 45 | ret[2:] = ret[:2] + ret[2:] 46 | return ret 47 | 48 | def predict(self, kf): 49 | """ 50 | 使用卡尔曼滤波进行状态预测 51 | """ 52 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 53 | self.time_since_update += 1 # 每次预测自增1 54 | 55 | def update(self, kf, detection): 56 | """ 57 | 进行相关矩阵和数据的更新 58 | """ 59 | self.mean, self.covariance = kf.update( 60 | self.mean, self.covariance, detection.to_xyah()) 61 | self.features.append(detection.feature) 62 | 63 | self.hits += 1 64 | self.time_since_update = 0 65 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 66 | self.state = TrackState.Confirmed 67 | 68 | def mark_missed(self): 69 | if self.state == TrackState.Tentative: 70 | self.state = TrackState.Deleted 71 | elif self.time_since_update > self._max_age: 72 | self.state = TrackState.Deleted 73 | 74 | def is_tentative(self): 75 | """ 76 | 该轨迹是否为tentative(临时存在) 77 | """ 78 | return self.state == TrackState.Tentative 79 | 80 | def is_confirmed(self): 81 | """ 82 | 该轨迹是否确认 83 | """ 84 | return self.state == TrackState.Confirmed 85 | 86 | def is_deleted(self): 87 | """ 88 | 该轨迹是否删除 89 | """ 90 | return self.state == TrackState.Deleted 91 | -------------------------------------------------------------------------------- /deepsort/sort/tracker.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from . import kalman_filter 3 | from . import linear_assignment 4 | from . import iou_matching 5 | from .track import Track 6 | 7 | 8 | class Tracker: 9 | """ 10 | 多目标跟踪器实现 11 | """ 12 | 13 | def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): 14 | self.metric = metric 15 | self.max_iou_distance = max_iou_distance 16 | self.max_age = max_age 17 | self.n_init = n_init 18 | 19 | self.kf = kalman_filter.KalmanFilter() 20 | self.tracks = [] 21 | self._next_id = 1 22 | 23 | def predict(self): 24 | """ 25 | 状态预测 26 | """ 27 | for track in self.tracks: 28 | track.predict(self.kf) 29 | 30 | def update(self, detections): 31 | """ 32 | 状态更新 33 | """ 34 | # 级联匹配 35 | matches, unmatched_tracks, unmatched_detections = self._match(detections) 36 | 37 | for track_idx, detection_idx in matches: 38 | # 成功匹配的要用检测结果更新对于track的参数 39 | # 包括 40 | # 更新卡尔曼滤波一系列运动变量、命中次数以及重置time_since_update 41 | # 检测的深度特征保存到track的特征集中 42 | # 连续命中三帧,将track状态由tentative改为confirmed 43 | 44 | self.tracks[track_idx].update( 45 | self.kf, detections[detection_idx]) 46 | for track_idx in unmatched_tracks: 47 | # 未成功匹配的track 48 | # 若未经过confirm则删除 49 | # 若已经confirm但连续max_age帧未匹配到检测结果也删除 50 | self.tracks[track_idx].mark_missed() 51 | for detection_idx in unmatched_detections: 52 | # 未匹配的检测,为其创建新的track 53 | self._initiate_track(detections[detection_idx]) 54 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 55 | 56 | # Update distance metric. 57 | # 更新已经确认的track的特征集 58 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 59 | features, targets = [], [] 60 | for track in self.tracks: 61 | if not track.is_confirmed(): 62 | continue 63 | features += track.features 64 | targets += [track.track_id for _ in track.features] 65 | track.features = [] 66 | self.metric.partial_fit( 67 | np.asarray(features), np.asarray(targets), active_targets) 68 | 69 | def _match(self, detections): 70 | """ 71 | 跟踪结果和检测结果的匹配 72 | :param detections: 73 | :return: 74 | """ 75 | 76 | def gated_metric(tracks, dets, track_indices, detection_indices): 77 | features = np.array([dets[i].feature for i in detection_indices]) 78 | targets = np.array([tracks[i].track_id for i in track_indices]) 79 | # 通过最近邻计算余弦距离代价矩阵 80 | cost_matrix = self.metric.distance(features, targets) 81 | # 计算马氏距离,得到新的代价矩阵 82 | cost_matrix = linear_assignment.gate_cost_matrix( 83 | self.kf, cost_matrix, tracks, dets, track_indices, 84 | detection_indices) 85 | 86 | return cost_matrix 87 | 88 | # 将track分为确认track和未确认track 89 | confirmed_tracks = [ 90 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 91 | unconfirmed_tracks = [ 92 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 93 | 94 | # 将确认的track和检测结果进行级联匹配(使用外观特征) 95 | matches_a, unmatched_tracks_a, unmatched_detections = linear_assignment.matching_cascade( 96 | gated_metric, self.metric.matching_threshold, self.max_age, 97 | self.tracks, detections, confirmed_tracks) 98 | 99 | # 将上一步未成功匹配的track和未确认的track组合到一起形成iou_track_candidates于还没有匹配结果的检测结果进行IOU匹配 100 | iou_track_candidates = unconfirmed_tracks + [ 101 | k for k in unmatched_tracks_a if 102 | self.tracks[k].time_since_update == 1] 103 | unmatched_tracks_a = [ 104 | k for k in unmatched_tracks_a if 105 | self.tracks[k].time_since_update != 1] 106 | # 计算两两之间的iou,再通过1-iou得到cost matrix 107 | matches_b, unmatched_tracks_b, unmatched_detections = linear_assignment.min_cost_matching( 108 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 109 | detections, iou_track_candidates, unmatched_detections) 110 | 111 | matches = matches_a + matches_b # 组合获得当前所有匹配结果 112 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 113 | return matches, unmatched_tracks, unmatched_detections 114 | 115 | def _initiate_track(self, detection): 116 | """ 117 | 初始化新的跟踪器,对应新的检测结果 118 | :param detection: 119 | :return: 120 | """ 121 | # 初始化卡尔曼 122 | mean, covariance = self.kf.initiate(detection.to_xyah()) 123 | # 创建新的跟踪器 124 | self.tracks.append(Track( 125 | mean, covariance, self._next_id, self.n_init, self.max_age, 126 | detection.feature)) 127 | # id自增 128 | self._next_id += 1 129 | -------------------------------------------------------------------------------- /detector/FasterRCNN/tips.txt: -------------------------------------------------------------------------------- 1 | gitignore this folder -------------------------------------------------------------------------------- /detector/YOLO3/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("detector/YOLO3") 3 | 4 | 5 | from .detector import YOLOv3 6 | __all__ = ['YOLOv3'] 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .yolo_utils import convert2cpu 3 | import os 4 | 5 | 6 | def parse_cfg(cfgfile): 7 | blocks = [] 8 | fp = open(cfgfile, 'r') 9 | block = None 10 | line = fp.readline() 11 | while line != '': 12 | line = line.rstrip() 13 | if line == '' or line[0] == '#': 14 | line = fp.readline() 15 | continue 16 | elif line[0] == '[': 17 | if block: 18 | blocks.append(block) 19 | block = dict() 20 | block['type'] = line.lstrip('[').rstrip(']') 21 | # set default value 22 | if block['type'] == 'convolutional': 23 | block['batch_normalize'] = 0 24 | else: 25 | key, value = line.split('=') 26 | key = key.strip() 27 | if key == 'type': 28 | key = '_type' 29 | value = value.strip() 30 | block[key] = value 31 | line = fp.readline() 32 | 33 | if block: 34 | blocks.append(block) 35 | fp.close() 36 | return blocks 37 | 38 | 39 | def print_cfg(blocks): 40 | print('layer filters size input output'); 41 | prev_width = 416 42 | prev_height = 416 43 | prev_filters = 3 44 | out_filters = [] 45 | out_widths = [] 46 | out_heights = [] 47 | ind = -2 48 | for block in blocks: 49 | ind = ind + 1 50 | if block['type'] == 'net': 51 | prev_width = int(block['width']) 52 | prev_height = int(block['height']) 53 | continue 54 | elif block['type'] == 'convolutional': 55 | filters = int(block['filters']) 56 | kernel_size = int(block['size']) 57 | stride = int(block['stride']) 58 | is_pad = int(block['pad']) 59 | pad = (kernel_size - 1) // 2 if is_pad else 0 60 | width = (prev_width + 2 * pad - kernel_size) // stride + 1 61 | height = (prev_height + 2 * pad - kernel_size) // stride + 1 62 | print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 63 | ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width, 64 | height, filters)) 65 | prev_width = width 66 | prev_height = height 67 | prev_filters = filters 68 | out_widths.append(prev_width) 69 | out_heights.append(prev_height) 70 | out_filters.append(prev_filters) 71 | elif block['type'] == 'maxpool': 72 | pool_size = int(block['size']) 73 | stride = int(block['stride']) 74 | width = prev_width // stride 75 | height = prev_height // stride 76 | print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 77 | ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, filters)) 78 | prev_width = width 79 | prev_height = height 80 | prev_filters = filters 81 | out_widths.append(prev_width) 82 | out_heights.append(prev_height) 83 | out_filters.append(prev_filters) 84 | elif block['type'] == 'avgpool': 85 | width = 1 86 | height = 1 87 | print('%5d %-6s %3d x %3d x%4d -> %3d' % ( 88 | ind, 'avg', prev_width, prev_height, prev_filters, prev_filters)) 89 | prev_width = width 90 | prev_height = height 91 | prev_filters = filters 92 | out_widths.append(prev_width) 93 | out_heights.append(prev_height) 94 | out_filters.append(prev_filters) 95 | elif block['type'] == 'softmax': 96 | print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters)) 97 | out_widths.append(prev_width) 98 | out_heights.append(prev_height) 99 | out_filters.append(prev_filters) 100 | elif block['type'] == 'cost': 101 | print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters)) 102 | out_widths.append(prev_width) 103 | out_heights.append(prev_height) 104 | out_filters.append(prev_filters) 105 | elif block['type'] == 'reorg': 106 | stride = int(block['stride']) 107 | filters = stride * stride * prev_filters 108 | width = prev_width // stride 109 | height = prev_height // stride 110 | print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 111 | ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters)) 112 | prev_width = width 113 | prev_height = height 114 | prev_filters = filters 115 | out_widths.append(prev_width) 116 | out_heights.append(prev_height) 117 | out_filters.append(prev_filters) 118 | elif block['type'] == 'upsample': 119 | stride = int(block['stride']) 120 | filters = prev_filters 121 | width = prev_width * stride 122 | height = prev_height * stride 123 | print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 124 | ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters)) 125 | prev_width = width 126 | prev_height = height 127 | prev_filters = filters 128 | out_widths.append(prev_width) 129 | out_heights.append(prev_height) 130 | out_filters.append(prev_filters) 131 | elif block['type'] == 'route': 132 | layers = block['layers'].split(',') 133 | layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] 134 | if len(layers) == 1: 135 | print('%5d %-6s %d' % (ind, 'route', layers[0])) 136 | prev_width = out_widths[layers[0]] 137 | prev_height = out_heights[layers[0]] 138 | prev_filters = out_filters[layers[0]] 139 | elif len(layers) == 2: 140 | print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1])) 141 | prev_width = out_widths[layers[0]] 142 | prev_height = out_heights[layers[0]] 143 | assert (prev_width == out_widths[layers[1]]) 144 | assert (prev_height == out_heights[layers[1]]) 145 | prev_filters = out_filters[layers[0]] + out_filters[layers[1]] 146 | out_widths.append(prev_width) 147 | out_heights.append(prev_height) 148 | out_filters.append(prev_filters) 149 | elif block['type'] in ['region', 'yolo']: 150 | print('%5d %-6s' % (ind, 'detection')) 151 | out_widths.append(prev_width) 152 | out_heights.append(prev_height) 153 | out_filters.append(prev_filters) 154 | elif block['type'] == 'shortcut': 155 | from_id = int(block['from']) 156 | from_id = from_id if from_id > 0 else from_id + ind 157 | print('%5d %-6s %d' % (ind, 'shortcut', from_id)) 158 | prev_width = out_widths[from_id] 159 | prev_height = out_heights[from_id] 160 | prev_filters = out_filters[from_id] 161 | out_widths.append(prev_width) 162 | out_heights.append(prev_height) 163 | out_filters.append(prev_filters) 164 | elif block['type'] == 'connected': 165 | filters = int(block['output']) 166 | print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters)) 167 | prev_filters = filters 168 | out_widths.append(1) 169 | out_heights.append(1) 170 | out_filters.append(prev_filters) 171 | else: 172 | print('unknown type %s' % (block['type'])) 173 | 174 | 175 | def load_conv(buf, start, conv_model): 176 | num_w = conv_model.weight.numel() 177 | num_b = conv_model.bias.numel() 178 | # print("start: {}, num_w: {}, num_b: {}".format(start, num_w, num_b)) 179 | # by ysyun, use .view_as() 180 | conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]).view_as(conv_model.bias.data)); 181 | start = start + num_b 182 | conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data)); 183 | start = start + num_w 184 | return start 185 | 186 | 187 | def save_conv(fp, conv_model): 188 | if conv_model.bias.is_cuda: 189 | convert2cpu(conv_model.bias.data).numpy().tofile(fp) 190 | convert2cpu(conv_model.weight.data).numpy().tofile(fp) 191 | else: 192 | conv_model.bias.data.numpy().tofile(fp) 193 | conv_model.weight.data.numpy().tofile(fp) 194 | 195 | 196 | def load_conv_bn(buf, start, conv_model, bn_model): 197 | num_w = conv_model.weight.numel() 198 | num_b = bn_model.bias.numel() 199 | bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])); 200 | start = start + num_b 201 | bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b])); 202 | start = start + num_b 203 | bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b])); 204 | start = start + num_b 205 | bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b])); 206 | start = start + num_b 207 | # conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w 208 | conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data)); 209 | start = start + num_w 210 | return start 211 | 212 | 213 | def save_conv_bn(fp, conv_model, bn_model): 214 | if bn_model.bias.is_cuda: 215 | convert2cpu(bn_model.bias.data).numpy().tofile(fp) 216 | convert2cpu(bn_model.weight.data).numpy().tofile(fp) 217 | convert2cpu(bn_model.running_mean).numpy().tofile(fp) 218 | convert2cpu(bn_model.running_var).numpy().tofile(fp) 219 | convert2cpu(conv_model.weight.data).numpy().tofile(fp) 220 | else: 221 | bn_model.bias.data.numpy().tofile(fp) 222 | bn_model.weight.data.numpy().tofile(fp) 223 | bn_model.running_mean.numpy().tofile(fp) 224 | bn_model.running_var.numpy().tofile(fp) 225 | conv_model.weight.data.numpy().tofile(fp) 226 | 227 | 228 | def load_fc(buf, start, fc_model): 229 | num_w = fc_model.weight.numel() 230 | num_b = fc_model.bias.numel() 231 | fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])); 232 | start = start + num_b 233 | fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w])); 234 | start = start + num_w 235 | return start 236 | 237 | 238 | def save_fc(fp, fc_model): 239 | fc_model.bias.data.numpy().tofile(fp) 240 | fc_model.weight.data.numpy().tofile(fp) 241 | 242 | 243 | if __name__ == '__main__': 244 | import sys 245 | 246 | blocks = parse_cfg('cfg/yolo.cfg') 247 | if len(sys.argv) == 2: 248 | blocks = parse_cfg(sys.argv[1]) 249 | print_cfg(blocks) 250 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/coco.data: -------------------------------------------------------------------------------- 1 | train = coco_train.txt 2 | valid = coco_test.txt 3 | names = data/coco.names 4 | backup = backup 5 | gpus = 0,1,2,3 6 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/darknet19_448.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=4 4 | height=448 5 | width=448 6 | max_crop=512 7 | channels=3 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | learning_rate=0.001 12 | policy=poly 13 | power=4 14 | max_batches=100000 15 | 16 | angle=7 17 | hue = .1 18 | saturation=.75 19 | exposure=.75 20 | aspect=.75 21 | 22 | [convolutional] 23 | batch_normalize=1 24 | filters=32 25 | size=3 26 | stride=1 27 | pad=1 28 | activation=leaky 29 | 30 | [maxpool] 31 | size=2 32 | stride=2 33 | 34 | [convolutional] 35 | batch_normalize=1 36 | filters=64 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | batch_normalize=1 48 | filters=128 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=leaky 53 | 54 | [convolutional] 55 | batch_normalize=1 56 | filters=64 57 | size=1 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=128 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | batch_normalize=1 92 | filters=256 93 | size=3 94 | stride=1 95 | pad=1 96 | activation=leaky 97 | 98 | [maxpool] 99 | size=2 100 | stride=2 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | filters=512 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=leaky 109 | 110 | [convolutional] 111 | batch_normalize=1 112 | filters=256 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=512 121 | size=3 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [convolutional] 127 | batch_normalize=1 128 | filters=256 129 | size=1 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | batch_normalize=1 136 | filters=512 137 | size=3 138 | stride=1 139 | pad=1 140 | activation=leaky 141 | 142 | [maxpool] 143 | size=2 144 | stride=2 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=1024 149 | size=3 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [convolutional] 155 | batch_normalize=1 156 | filters=512 157 | size=1 158 | stride=1 159 | pad=1 160 | activation=leaky 161 | 162 | [convolutional] 163 | batch_normalize=1 164 | filters=1024 165 | size=3 166 | stride=1 167 | pad=1 168 | activation=leaky 169 | 170 | [convolutional] 171 | batch_normalize=1 172 | filters=512 173 | size=1 174 | stride=1 175 | pad=1 176 | activation=leaky 177 | 178 | [convolutional] 179 | batch_normalize=1 180 | filters=1024 181 | size=3 182 | stride=1 183 | pad=1 184 | activation=leaky 185 | 186 | [convolutional] 187 | filters=1000 188 | size=1 189 | stride=1 190 | pad=1 191 | activation=linear 192 | 193 | [avgpool] 194 | 195 | [softmax] 196 | groups=1 197 | 198 | [cost] 199 | type=sse 200 | 201 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40200 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/tiny-yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Training 3 | # batch=64 4 | # subdivisions=2 5 | # Testing 6 | batch=1 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | size=3 110 | stride=1 111 | pad=1 112 | filters=512 113 | activation=leaky 114 | 115 | [convolutional] 116 | size=1 117 | stride=1 118 | pad=1 119 | filters=425 120 | activation=linear 121 | 122 | [region] 123 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 124 | bias_match=1 125 | classes=80 126 | coords=4 127 | num=5 128 | softmax=1 129 | jitter=.2 130 | rescore=0 131 | 132 | object_scale=5 133 | noobject_scale=1 134 | class_scale=1 135 | coord_scale=1 136 | 137 | absolute=1 138 | thresh = .6 139 | random=1 140 | 141 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/voc.data: -------------------------------------------------------------------------------- 1 | train = data/voc_train.txt 2 | valid = data/2007_test.txt 3 | names = data/voc.names 4 | backup = backup 5 | gpus = 3 6 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/voc_gaotie.data: -------------------------------------------------------------------------------- 1 | train = data/gaotie_trainval.txt 2 | valid = data/gaotie_test.txt 3 | names = data/voc.names 4 | backup = backup 5 | gpus = 3 -------------------------------------------------------------------------------- /detector/YOLO3/cfg/yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=64 4 | subdivisions=8 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 80200 21 | policy=steps 22 | steps=-1,500,40000,60000 23 | scales=0.1,10,.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=125 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 243 | bias_match=1 244 | classes=20 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/yolo_v3.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=16 7 | subdivisions=4 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=20,25 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | # Downsample 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=32 46 | size=1 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [shortcut] 60 | from=-3 61 | activation=linear 62 | 63 | # Downsample 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=2 70 | pad=1 71 | activation=leaky 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=64 76 | size=1 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=3 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | [shortcut] 90 | from=-3 91 | activation=linear 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=64 96 | size=1 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=128 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [shortcut] 110 | from=-3 111 | activation=linear 112 | 113 | # Downsample 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=256 118 | size=3 119 | stride=2 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | batch_normalize=1 125 | filters=128 126 | size=1 127 | stride=1 128 | pad=1 129 | activation=leaky 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [shortcut] 140 | from=-3 141 | activation=linear 142 | 143 | [convolutional] 144 | batch_normalize=1 145 | filters=128 146 | size=1 147 | stride=1 148 | pad=1 149 | activation=leaky 150 | 151 | [convolutional] 152 | batch_normalize=1 153 | filters=256 154 | size=3 155 | stride=1 156 | pad=1 157 | activation=leaky 158 | 159 | [shortcut] 160 | from=-3 161 | activation=linear 162 | 163 | [convolutional] 164 | batch_normalize=1 165 | filters=128 166 | size=1 167 | stride=1 168 | pad=1 169 | activation=leaky 170 | 171 | [convolutional] 172 | batch_normalize=1 173 | filters=256 174 | size=3 175 | stride=1 176 | pad=1 177 | activation=leaky 178 | 179 | [shortcut] 180 | from=-3 181 | activation=linear 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=128 186 | size=1 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | [convolutional] 192 | batch_normalize=1 193 | filters=256 194 | size=3 195 | stride=1 196 | pad=1 197 | activation=leaky 198 | 199 | [shortcut] 200 | from=-3 201 | activation=linear 202 | 203 | 204 | [convolutional] 205 | batch_normalize=1 206 | filters=128 207 | size=1 208 | stride=1 209 | pad=1 210 | activation=leaky 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=3 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [shortcut] 221 | from=-3 222 | activation=linear 223 | 224 | [convolutional] 225 | batch_normalize=1 226 | filters=128 227 | size=1 228 | stride=1 229 | pad=1 230 | activation=leaky 231 | 232 | [convolutional] 233 | batch_normalize=1 234 | filters=256 235 | size=3 236 | stride=1 237 | pad=1 238 | activation=leaky 239 | 240 | [shortcut] 241 | from=-3 242 | activation=linear 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=128 247 | size=1 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | batch_normalize=1 254 | filters=256 255 | size=3 256 | stride=1 257 | pad=1 258 | activation=leaky 259 | 260 | [shortcut] 261 | from=-3 262 | activation=linear 263 | 264 | [convolutional] 265 | batch_normalize=1 266 | filters=128 267 | size=1 268 | stride=1 269 | pad=1 270 | activation=leaky 271 | 272 | [convolutional] 273 | batch_normalize=1 274 | filters=256 275 | size=3 276 | stride=1 277 | pad=1 278 | activation=leaky 279 | 280 | [shortcut] 281 | from=-3 282 | activation=linear 283 | 284 | # Downsample 285 | 286 | [convolutional] 287 | batch_normalize=1 288 | filters=512 289 | size=3 290 | stride=2 291 | pad=1 292 | activation=leaky 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=1 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | batch_normalize=1 304 | filters=512 305 | size=3 306 | stride=1 307 | pad=1 308 | activation=leaky 309 | 310 | [shortcut] 311 | from=-3 312 | activation=linear 313 | 314 | 315 | [convolutional] 316 | batch_normalize=1 317 | filters=256 318 | size=1 319 | stride=1 320 | pad=1 321 | activation=leaky 322 | 323 | [convolutional] 324 | batch_normalize=1 325 | filters=512 326 | size=3 327 | stride=1 328 | pad=1 329 | activation=leaky 330 | 331 | [shortcut] 332 | from=-3 333 | activation=linear 334 | 335 | 336 | [convolutional] 337 | batch_normalize=1 338 | filters=256 339 | size=1 340 | stride=1 341 | pad=1 342 | activation=leaky 343 | 344 | [convolutional] 345 | batch_normalize=1 346 | filters=512 347 | size=3 348 | stride=1 349 | pad=1 350 | activation=leaky 351 | 352 | [shortcut] 353 | from=-3 354 | activation=linear 355 | 356 | 357 | [convolutional] 358 | batch_normalize=1 359 | filters=256 360 | size=1 361 | stride=1 362 | pad=1 363 | activation=leaky 364 | 365 | [convolutional] 366 | batch_normalize=1 367 | filters=512 368 | size=3 369 | stride=1 370 | pad=1 371 | activation=leaky 372 | 373 | [shortcut] 374 | from=-3 375 | activation=linear 376 | 377 | [convolutional] 378 | batch_normalize=1 379 | filters=256 380 | size=1 381 | stride=1 382 | pad=1 383 | activation=leaky 384 | 385 | [convolutional] 386 | batch_normalize=1 387 | filters=512 388 | size=3 389 | stride=1 390 | pad=1 391 | activation=leaky 392 | 393 | [shortcut] 394 | from=-3 395 | activation=linear 396 | 397 | 398 | [convolutional] 399 | batch_normalize=1 400 | filters=256 401 | size=1 402 | stride=1 403 | pad=1 404 | activation=leaky 405 | 406 | [convolutional] 407 | batch_normalize=1 408 | filters=512 409 | size=3 410 | stride=1 411 | pad=1 412 | activation=leaky 413 | 414 | [shortcut] 415 | from=-3 416 | activation=linear 417 | 418 | 419 | [convolutional] 420 | batch_normalize=1 421 | filters=256 422 | size=1 423 | stride=1 424 | pad=1 425 | activation=leaky 426 | 427 | [convolutional] 428 | batch_normalize=1 429 | filters=512 430 | size=3 431 | stride=1 432 | pad=1 433 | activation=leaky 434 | 435 | [shortcut] 436 | from=-3 437 | activation=linear 438 | 439 | [convolutional] 440 | batch_normalize=1 441 | filters=256 442 | size=1 443 | stride=1 444 | pad=1 445 | activation=leaky 446 | 447 | [convolutional] 448 | batch_normalize=1 449 | filters=512 450 | size=3 451 | stride=1 452 | pad=1 453 | activation=leaky 454 | 455 | [shortcut] 456 | from=-3 457 | activation=linear 458 | 459 | # Downsample 460 | 461 | [convolutional] 462 | batch_normalize=1 463 | filters=1024 464 | size=3 465 | stride=2 466 | pad=1 467 | activation=leaky 468 | 469 | [convolutional] 470 | batch_normalize=1 471 | filters=512 472 | size=1 473 | stride=1 474 | pad=1 475 | activation=leaky 476 | 477 | [convolutional] 478 | batch_normalize=1 479 | filters=1024 480 | size=3 481 | stride=1 482 | pad=1 483 | activation=leaky 484 | 485 | [shortcut] 486 | from=-3 487 | activation=linear 488 | 489 | [convolutional] 490 | batch_normalize=1 491 | filters=512 492 | size=1 493 | stride=1 494 | pad=1 495 | activation=leaky 496 | 497 | [convolutional] 498 | batch_normalize=1 499 | filters=1024 500 | size=3 501 | stride=1 502 | pad=1 503 | activation=leaky 504 | 505 | [shortcut] 506 | from=-3 507 | activation=linear 508 | 509 | [convolutional] 510 | batch_normalize=1 511 | filters=512 512 | size=1 513 | stride=1 514 | pad=1 515 | activation=leaky 516 | 517 | [convolutional] 518 | batch_normalize=1 519 | filters=1024 520 | size=3 521 | stride=1 522 | pad=1 523 | activation=leaky 524 | 525 | [shortcut] 526 | from=-3 527 | activation=linear 528 | 529 | [convolutional] 530 | batch_normalize=1 531 | filters=512 532 | size=1 533 | stride=1 534 | pad=1 535 | activation=leaky 536 | 537 | [convolutional] 538 | batch_normalize=1 539 | filters=1024 540 | size=3 541 | stride=1 542 | pad=1 543 | activation=leaky 544 | 545 | [shortcut] 546 | from=-3 547 | activation=linear 548 | 549 | ###################### 550 | 551 | [convolutional] 552 | batch_normalize=1 553 | filters=512 554 | size=1 555 | stride=1 556 | pad=1 557 | activation=leaky 558 | 559 | [convolutional] 560 | batch_normalize=1 561 | size=3 562 | stride=1 563 | pad=1 564 | filters=1024 565 | activation=leaky 566 | 567 | [convolutional] 568 | batch_normalize=1 569 | filters=512 570 | size=1 571 | stride=1 572 | pad=1 573 | activation=leaky 574 | 575 | [convolutional] 576 | batch_normalize=1 577 | size=3 578 | stride=1 579 | pad=1 580 | filters=1024 581 | activation=leaky 582 | 583 | [convolutional] 584 | batch_normalize=1 585 | filters=512 586 | size=1 587 | stride=1 588 | pad=1 589 | activation=leaky 590 | 591 | [convolutional] 592 | batch_normalize=1 593 | size=3 594 | stride=1 595 | pad=1 596 | filters=1024 597 | activation=leaky 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=255 604 | activation=linear 605 | 606 | 607 | [yolo] 608 | mask = 6,7,8 609 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 610 | classes=80 611 | num=9 612 | jitter=.3 613 | ignore_thresh = .5 614 | truth_thresh = 1 615 | random=1 616 | 617 | 618 | [route] 619 | layers = -4 620 | 621 | [convolutional] 622 | batch_normalize=1 623 | filters=256 624 | size=1 625 | stride=1 626 | pad=1 627 | activation=leaky 628 | 629 | [upsample] 630 | stride=2 631 | 632 | [route] 633 | layers = -1, 61 634 | 635 | 636 | 637 | [convolutional] 638 | batch_normalize=1 639 | filters=256 640 | size=1 641 | stride=1 642 | pad=1 643 | activation=leaky 644 | 645 | [convolutional] 646 | batch_normalize=1 647 | size=3 648 | stride=1 649 | pad=1 650 | filters=512 651 | activation=leaky 652 | 653 | [convolutional] 654 | batch_normalize=1 655 | filters=256 656 | size=1 657 | stride=1 658 | pad=1 659 | activation=leaky 660 | 661 | [convolutional] 662 | batch_normalize=1 663 | size=3 664 | stride=1 665 | pad=1 666 | filters=512 667 | activation=leaky 668 | 669 | [convolutional] 670 | batch_normalize=1 671 | filters=256 672 | size=1 673 | stride=1 674 | pad=1 675 | activation=leaky 676 | 677 | [convolutional] 678 | batch_normalize=1 679 | size=3 680 | stride=1 681 | pad=1 682 | filters=512 683 | activation=leaky 684 | 685 | [convolutional] 686 | size=1 687 | stride=1 688 | pad=1 689 | filters=255 690 | activation=linear 691 | 692 | 693 | [yolo] 694 | mask = 3,4,5 695 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 696 | classes=80 697 | num=9 698 | jitter=.3 699 | ignore_thresh = .5 700 | truth_thresh = 1 701 | random=1 702 | 703 | 704 | 705 | [route] 706 | layers = -4 707 | 708 | [convolutional] 709 | batch_normalize=1 710 | filters=128 711 | size=1 712 | stride=1 713 | pad=1 714 | activation=leaky 715 | 716 | [upsample] 717 | stride=2 718 | 719 | [route] 720 | layers = -1, 36 721 | 722 | 723 | 724 | [convolutional] 725 | batch_normalize=1 726 | filters=128 727 | size=1 728 | stride=1 729 | pad=1 730 | activation=leaky 731 | 732 | [convolutional] 733 | batch_normalize=1 734 | size=3 735 | stride=1 736 | pad=1 737 | filters=256 738 | activation=leaky 739 | 740 | [convolutional] 741 | batch_normalize=1 742 | filters=128 743 | size=1 744 | stride=1 745 | pad=1 746 | activation=leaky 747 | 748 | [convolutional] 749 | batch_normalize=1 750 | size=3 751 | stride=1 752 | pad=1 753 | filters=256 754 | activation=leaky 755 | 756 | [convolutional] 757 | batch_normalize=1 758 | filters=128 759 | size=1 760 | stride=1 761 | pad=1 762 | activation=leaky 763 | 764 | [convolutional] 765 | batch_normalize=1 766 | size=3 767 | stride=1 768 | pad=1 769 | filters=256 770 | activation=leaky 771 | 772 | [convolutional] 773 | size=1 774 | stride=1 775 | pad=1 776 | filters=255 777 | activation=linear 778 | 779 | 780 | [yolo] 781 | mask = 0,1,2 782 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 783 | classes=80 784 | num=9 785 | jitter=.3 786 | ignore_thresh = .5 787 | truth_thresh = 1 788 | random=1 789 | 790 | -------------------------------------------------------------------------------- /detector/YOLO3/cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /detector/YOLO3/darknet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from .cfg import * 7 | from .region_layer import RegionLayer 8 | from .yolo_layer import YoloLayer 9 | 10 | 11 | # from layers.batchnorm.bn import BN2d 12 | 13 | 14 | class MaxPoolStride1(nn.Module): 15 | def __init__(self): 16 | super(MaxPoolStride1, self).__init__() 17 | 18 | def forward(self, x): 19 | x = F.max_pool2d(F.pad(x, (0, 1, 0, 1), mode='replicate'), 2, stride=1) 20 | return x 21 | 22 | 23 | class Upsample(nn.Module): 24 | def __init__(self, stride=2): 25 | super(Upsample, self).__init__() 26 | self.stride = stride 27 | 28 | def forward(self, x): 29 | stride = self.stride 30 | assert (x.data.dim() == 4) 31 | B = x.data.size(0) 32 | C = x.data.size(1) 33 | H = x.data.size(2) 34 | W = x.data.size(3) 35 | ws = stride 36 | hs = stride 37 | x = x.view(B, C, H, 1, W, 1).expand(B, C, H, hs, W, ws).contiguous().view(B, C, H * hs, W * ws) 38 | return x 39 | 40 | 41 | class Reorg(nn.Module): 42 | def __init__(self, stride=2): 43 | super(Reorg, self).__init__() 44 | self.stride = stride 45 | 46 | def forward(self, x): 47 | stride = self.stride 48 | assert (x.data.dim() == 4) 49 | B = x.data.size(0) 50 | C = x.data.size(1) 51 | H = x.data.size(2) 52 | W = x.data.size(3) 53 | assert (H % stride == 0) 54 | assert (W % stride == 0) 55 | ws = stride 56 | hs = stride 57 | x = x.view(B, C, H // hs, hs, W // ws, ws).transpose(3, 4).contiguous() 58 | x = x.view(B, C, (H // hs) * (W // ws), hs * ws).transpose(2, 3).contiguous() 59 | x = x.view(B, C, hs * ws, H // hs, W // ws).transpose(1, 2).contiguous() 60 | x = x.view(B, hs * ws * C, H // hs, W // ws) 61 | return x 62 | 63 | 64 | class GlobalAvgPool2d(nn.Module): 65 | def __init__(self): 66 | super(GlobalAvgPool2d, self).__init__() 67 | 68 | def forward(self, x): 69 | N = x.data.size(0) 70 | C = x.data.size(1) 71 | H = x.data.size(2) 72 | W = x.data.size(3) 73 | x = F.avg_pool2d(x, (H, W)) 74 | x = x.view(N, C) 75 | return x 76 | 77 | 78 | # for route and shortcut 79 | class EmptyModule(nn.Module): 80 | def __init__(self): 81 | super(EmptyModule, self).__init__() 82 | 83 | def forward(self, x): 84 | return x 85 | 86 | 87 | # support route shortcut and reorg 88 | 89 | class Darknet(nn.Module): 90 | def getLossLayers(self): 91 | loss_layers = [] 92 | for m in self.models: 93 | if isinstance(m, RegionLayer) or isinstance(m, YoloLayer): 94 | loss_layers.append(m) 95 | return loss_layers 96 | 97 | def __init__(self, cfgfile, use_cuda=True): 98 | super(Darknet, self).__init__() 99 | self.use_cuda = use_cuda 100 | self.blocks = parse_cfg(cfgfile) 101 | self.models = self.create_network(self.blocks) # merge conv, bn,leaky 102 | self.loss_layers = self.getLossLayers() 103 | 104 | # self.width = int(self.blocks[0]['width']) 105 | # self.height = int(self.blocks[0]['height']) 106 | 107 | if len(self.loss_layers) > 0: 108 | last = len(self.loss_layers) - 1 109 | self.anchors = self.loss_layers[last].anchors 110 | self.num_anchors = self.loss_layers[last].num_anchors 111 | self.anchor_step = self.loss_layers[last].anchor_step 112 | self.num_classes = self.loss_layers[last].num_classes 113 | 114 | # default format : major=0, minor=1 115 | self.header = torch.IntTensor([0, 1, 0, 0]) 116 | self.seen = 0 117 | 118 | def forward(self, x): 119 | ind = -2 120 | self.loss_layers = None 121 | outputs = dict() 122 | out_boxes = dict() 123 | outno = 0 124 | for block in self.blocks: 125 | ind = ind + 1 126 | 127 | if block['type'] == 'net': 128 | continue 129 | elif block['type'] in ['convolutional', 'maxpool', 'reorg', 'upsample', 'avgpool', 'softmax', 'connected']: 130 | x = self.models[ind](x) 131 | outputs[ind] = x 132 | elif block['type'] == 'route': 133 | layers = block['layers'].split(',') 134 | layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] 135 | if len(layers) == 1: 136 | x = outputs[layers[0]] 137 | elif len(layers) == 2: 138 | x1 = outputs[layers[0]] 139 | x2 = outputs[layers[1]] 140 | x = torch.cat((x1, x2), 1) 141 | outputs[ind] = x 142 | elif block['type'] == 'shortcut': 143 | from_layer = int(block['from']) 144 | activation = block['activation'] 145 | from_layer = from_layer if from_layer > 0 else from_layer + ind 146 | x1 = outputs[from_layer] 147 | x2 = outputs[ind - 1] 148 | x = x1 + x2 149 | if activation == 'leaky': 150 | x = F.leaky_relu(x, 0.1, inplace=True) 151 | elif activation == 'relu': 152 | x = F.relu(x, inplace=True) 153 | outputs[ind] = x 154 | elif block['type'] in ['region', 'yolo']: 155 | boxes = self.models[ind].get_mask_boxes(x) 156 | out_boxes[outno] = boxes 157 | outno += 1 158 | outputs[ind] = None 159 | elif block['type'] == 'cost': 160 | continue 161 | else: 162 | print('unknown type %s' % (block['type'])) 163 | return x if outno == 0 else out_boxes 164 | 165 | def print_network(self): 166 | print_cfg(self.blocks) 167 | 168 | def create_network(self, blocks): 169 | models = nn.ModuleList() 170 | 171 | prev_filters = 3 172 | out_filters = [] 173 | prev_stride = 1 174 | out_strides = [] 175 | conv_id = 0 176 | ind = -2 177 | for block in blocks: 178 | ind += 1 179 | if block['type'] == 'net': 180 | prev_filters = int(block['channels']) 181 | self.width = int(block['width']) 182 | self.height = int(block['height']) 183 | continue 184 | elif block['type'] == 'convolutional': 185 | conv_id = conv_id + 1 186 | batch_normalize = int(block['batch_normalize']) 187 | filters = int(block['filters']) 188 | kernel_size = int(block['size']) 189 | stride = int(block['stride']) 190 | is_pad = int(block['pad']) 191 | pad = (kernel_size - 1) // 2 if is_pad else 0 192 | activation = block['activation'] 193 | model = nn.Sequential() 194 | if batch_normalize: 195 | model.add_module('conv{0}'.format(conv_id), 196 | nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False)) 197 | model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters)) 198 | # model.add_module('bn{0}'.format(conv_id), BN2d(filters)) 199 | else: 200 | model.add_module('conv{0}'.format(conv_id), 201 | nn.Conv2d(prev_filters, filters, kernel_size, stride, pad)) 202 | if activation == 'leaky': 203 | model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True)) 204 | elif activation == 'relu': 205 | model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True)) 206 | prev_filters = filters 207 | out_filters.append(prev_filters) 208 | prev_stride = stride * prev_stride 209 | out_strides.append(prev_stride) 210 | models.append(model) 211 | elif block['type'] == 'maxpool': 212 | pool_size = int(block['size']) 213 | stride = int(block['stride']) 214 | if stride > 1: 215 | model = nn.MaxPool2d(pool_size, stride) 216 | else: 217 | model = MaxPoolStride1() 218 | out_filters.append(prev_filters) 219 | prev_stride = stride * prev_stride 220 | out_strides.append(prev_stride) 221 | models.append(model) 222 | elif block['type'] == 'avgpool': 223 | model = GlobalAvgPool2d() 224 | out_filters.append(prev_filters) 225 | models.append(model) 226 | elif block['type'] == 'softmax': 227 | model = nn.Softmax() 228 | out_strides.append(prev_stride) 229 | out_filters.append(prev_filters) 230 | models.append(model) 231 | elif block['type'] == 'cost': 232 | if block['_type'] == 'sse': 233 | model = nn.MSELoss(size_average=True) 234 | elif block['_type'] == 'L1': 235 | model = nn.L1Loss(size_average=True) 236 | elif block['_type'] == 'smooth': 237 | model = nn.SmoothL1Loss(size_average=True) 238 | out_filters.append(1) 239 | out_strides.append(prev_stride) 240 | models.append(model) 241 | elif block['type'] == 'reorg': 242 | stride = int(block['stride']) 243 | prev_filters = stride * stride * prev_filters 244 | out_filters.append(prev_filters) 245 | prev_stride = prev_stride * stride 246 | out_strides.append(prev_stride) 247 | models.append(Reorg(stride)) 248 | elif block['type'] == 'upsample': 249 | stride = int(block['stride']) 250 | out_filters.append(prev_filters) 251 | prev_stride = prev_stride / stride 252 | out_strides.append(prev_stride) 253 | # models.append(nn.Upsample(scale_factor=stride, mode='nearest')) 254 | models.append(Upsample(stride)) 255 | elif block['type'] == 'route': 256 | layers = block['layers'].split(',') 257 | ind = len(models) 258 | layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] 259 | if len(layers) == 1: 260 | prev_filters = out_filters[layers[0]] 261 | prev_stride = out_strides[layers[0]] 262 | elif len(layers) == 2: 263 | assert (layers[0] == ind - 1) 264 | prev_filters = out_filters[layers[0]] + out_filters[layers[1]] 265 | prev_stride = out_strides[layers[0]] 266 | out_filters.append(prev_filters) 267 | out_strides.append(prev_stride) 268 | models.append(EmptyModule()) 269 | elif block['type'] == 'shortcut': 270 | ind = len(models) 271 | prev_filters = out_filters[ind - 1] 272 | out_filters.append(prev_filters) 273 | prev_stride = out_strides[ind - 1] 274 | out_strides.append(prev_stride) 275 | models.append(EmptyModule()) 276 | elif block['type'] == 'connected': 277 | filters = int(block['output']) 278 | if block['activation'] == 'linear': 279 | model = nn.Linear(prev_filters, filters) 280 | elif block['activation'] == 'leaky': 281 | model = nn.Sequential( 282 | nn.Linear(prev_filters, filters), 283 | nn.LeakyReLU(0.1, inplace=True)) 284 | elif block['activation'] == 'relu': 285 | model = nn.Sequential( 286 | nn.Linear(prev_filters, filters), 287 | nn.ReLU(inplace=True)) 288 | prev_filters = filters 289 | out_filters.append(prev_filters) 290 | out_strides.append(prev_stride) 291 | models.append(model) 292 | elif block['type'] == 'region': 293 | region_layer = RegionLayer(use_cuda=self.use_cuda) 294 | anchors = block['anchors'].split(',') 295 | region_layer.anchors = [float(i) for i in anchors] 296 | region_layer.num_classes = int(block['classes']) 297 | region_layer.num_anchors = int(block['num']) 298 | region_layer.anchor_step = len(region_layer.anchors) // region_layer.num_anchors 299 | region_layer.rescore = int(block['rescore']) 300 | region_layer.object_scale = float(block['object_scale']) 301 | region_layer.noobject_scale = float(block['noobject_scale']) 302 | region_layer.class_scale = float(block['class_scale']) 303 | region_layer.coord_scale = float(block['coord_scale']) 304 | region_layer.thresh = float(block['thresh']) 305 | out_filters.append(prev_filters) 306 | out_strides.append(prev_stride) 307 | models.append(region_layer) 308 | elif block['type'] == 'yolo': 309 | yolo_layer = YoloLayer(use_cuda=self.use_cuda) 310 | anchors = block['anchors'].split(',') 311 | anchor_mask = block['mask'].split(',') 312 | yolo_layer.anchor_mask = [int(i) for i in anchor_mask] 313 | yolo_layer.anchors = [float(i) for i in anchors] 314 | yolo_layer.num_classes = int(block['classes']) 315 | yolo_layer.num_anchors = int(block['num']) 316 | yolo_layer.anchor_step = len(yolo_layer.anchors) // yolo_layer.num_anchors 317 | try: 318 | yolo_layer.rescore = int(block['rescore']) 319 | except: 320 | pass 321 | yolo_layer.ignore_thresh = float(block['ignore_thresh']) 322 | yolo_layer.truth_thresh = float(block['truth_thresh']) 323 | yolo_layer.stride = prev_stride 324 | yolo_layer.nth_layer = ind 325 | yolo_layer.net_width = self.width 326 | yolo_layer.net_height = self.height 327 | out_filters.append(prev_filters) 328 | out_strides.append(prev_stride) 329 | models.append(yolo_layer) 330 | else: 331 | print('unknown type %s' % (block['type'])) 332 | 333 | return models 334 | 335 | def load_binfile(self, weightfile): 336 | fp = open(weightfile, 'rb') 337 | 338 | version = np.fromfile(fp, count=3, dtype=np.int32) 339 | version = [int(i) for i in version] 340 | if version[0] * 10 + version[1] >= 2 and version[0] < 1000 and version[1] < 1000: 341 | seen = np.fromfile(fp, count=1, dtype=np.int64) 342 | else: 343 | seen = np.fromfile(fp, count=1, dtype=np.int32) 344 | self.header = torch.from_numpy(np.concatenate((version, seen), axis=0)) 345 | self.seen = int(seen) 346 | body = np.fromfile(fp, dtype=np.float32) 347 | fp.close() 348 | return body 349 | 350 | def load_weights(self, weightfile): 351 | buf = self.load_binfile(weightfile) 352 | 353 | start = 0 354 | ind = -2 355 | for block in self.blocks: 356 | if start >= buf.size: 357 | break 358 | ind = ind + 1 359 | if block['type'] == 'net': 360 | continue 361 | elif block['type'] == 'convolutional': 362 | model = self.models[ind] 363 | batch_normalize = int(block['batch_normalize']) 364 | if batch_normalize: 365 | start = load_conv_bn(buf, start, model[0], model[1]) 366 | else: 367 | start = load_conv(buf, start, model[0]) 368 | elif block['type'] == 'connected': 369 | model = self.models[ind] 370 | if block['activation'] != 'linear': 371 | start = load_fc(buf, start, model[0]) 372 | else: 373 | start = load_fc(buf, start, model) 374 | elif block['type'] == 'maxpool': 375 | pass 376 | elif block['type'] == 'reorg': 377 | pass 378 | elif block['type'] == 'upsample': 379 | pass 380 | elif block['type'] == 'route': 381 | pass 382 | elif block['type'] == 'shortcut': 383 | pass 384 | elif block['type'] == 'region': 385 | pass 386 | elif block['type'] == 'yolo': 387 | pass 388 | elif block['type'] == 'avgpool': 389 | pass 390 | elif block['type'] == 'softmax': 391 | pass 392 | elif block['type'] == 'cost': 393 | pass 394 | else: 395 | print('unknown type %s' % (block['type'])) 396 | 397 | def save_weights(self, outfile, cutoff=0): 398 | if cutoff <= 0: 399 | cutoff = len(self.blocks) - 1 400 | 401 | fp = open(outfile, 'wb') 402 | self.header[3] = self.seen 403 | header = np.array(self.header[0:3].numpy(), np.int32) 404 | header.tofile(fp) 405 | if (self.header[0] * 10 + self.header[1]) >= 2: 406 | seen = np.array(self.seen, np.int64) 407 | else: 408 | seen = np.array(self.seen, np.int32) 409 | seen.tofile(fp) 410 | 411 | ind = -1 412 | for blockId in range(1, cutoff + 1): 413 | ind = ind + 1 414 | block = self.blocks[blockId] 415 | if block['type'] == 'convolutional': 416 | model = self.models[ind] 417 | batch_normalize = int(block['batch_normalize']) 418 | if batch_normalize: 419 | save_conv_bn(fp, model[0], model[1]) 420 | else: 421 | save_conv(fp, model[0]) 422 | elif block['type'] == 'connected': 423 | model = self.models[ind] 424 | if block['activation'] != 'linear': 425 | save_fc(fc, model) 426 | else: 427 | save_fc(fc, model[0]) 428 | elif block['type'] == 'maxpool': 429 | pass 430 | elif block['type'] == 'reorg': 431 | pass 432 | elif block['type'] == 'upsample': 433 | pass 434 | elif block['type'] == 'route': 435 | pass 436 | elif block['type'] == 'shortcut': 437 | pass 438 | elif block['type'] == 'region': 439 | pass 440 | elif block['type'] == 'yolo': 441 | pass 442 | elif block['type'] == 'avgpool': 443 | pass 444 | elif block['type'] == 'softmax': 445 | pass 446 | elif block['type'] == 'cost': 447 | pass 448 | else: 449 | print('unknown type %s' % (block['type'])) 450 | fp.close() 451 | -------------------------------------------------------------------------------- /detector/YOLO3/detect.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | from PIL import Image, ImageDraw 4 | from yolo_utils import * 5 | from darknet import Darknet 6 | 7 | import cv2 8 | 9 | namesfile = None 10 | 11 | 12 | def detect(cfgfile, weightfile, imgfolder): 13 | m = Darknet(cfgfile) 14 | m.load_weights(weightfile) 15 | print('Loaded weights from %s.' % (weightfile)) 16 | 17 | # if m.num_classes == 20: 18 | # namesfile = 'data/voc.names' 19 | # elif m.num_classes == 80: 20 | # namesfile = 'data/coco.names' 21 | # else: 22 | # namesfile = 'data/names' 23 | 24 | use_cuda = True 25 | if use_cuda: 26 | m.cuda() 27 | 28 | imgfiles = [x for x in os.listdir(imgfolder) if x[-4:] == '.jpg'] 29 | imgfiles.sort() 30 | for imgname in imgfiles: 31 | imgfile = os.path.join(imgfolder, imgname) 32 | 33 | img = Image.open(imgfile).convert('RGB') 34 | sized = img.resize((m.width, m.height)) 35 | 36 | # for i in range(2): 37 | start = time.time() 38 | boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) 39 | finish = time.time() 40 | # if i == 1: 41 | print('%s: Predicted in %f seconds.' % (imgfile, (finish - start))) 42 | 43 | class_names = load_class_names(namesfile) 44 | img = plot_boxes(img, boxes, 'result/{}'.format(os.path.basename(imgfile)), class_names) 45 | img = np.array(img) 46 | cv2.imshow('{}'.format(os.path.basename(imgfolder)), img) 47 | cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000, 800) 48 | cv2.waitKey(1000) 49 | 50 | 51 | def detect_cv2(cfgfile, weightfile, imgfile): 52 | import cv2 53 | m = Darknet(cfgfile) 54 | 55 | m.print_network() 56 | m.load_weights(weightfile) 57 | print('Loaded weights from %s.' % (weightfile)) 58 | 59 | if m.num_classes == 20: 60 | namesfile = 'data/voc.names' 61 | elif m.num_classes == 80: 62 | namesfile = 'data/coco.names' 63 | else: 64 | namesfile = 'data/names' 65 | 66 | use_cuda = True 67 | if use_cuda: 68 | m.cuda() 69 | 70 | img = cv2.imread(imgfile) 71 | sized = cv2.resize(img, (m.width, m.height)) 72 | sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB) 73 | 74 | for i in range(2): 75 | start = time.time() 76 | boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) 77 | finish = time.time() 78 | if i == 1: 79 | print('%s: Predicted in %f seconds.' % (imgfile, (finish - start))) 80 | 81 | class_names = load_class_names(namesfile) 82 | plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names) 83 | 84 | 85 | def detect_skimage(cfgfile, weightfile, imgfile): 86 | from skimage import io 87 | from skimage.transform import resize 88 | m = Darknet(cfgfile) 89 | 90 | m.print_network() 91 | m.load_weights(weightfile) 92 | print('Loading weights from %s... Done!' % (weightfile)) 93 | 94 | if m.num_classes == 20: 95 | namesfile = 'data/voc.names' 96 | elif m.num_classes == 80: 97 | namesfile = 'data/coco.names' 98 | else: 99 | namesfile = 'data/names' 100 | 101 | use_cuda = True 102 | if use_cuda: 103 | m.cuda() 104 | 105 | img = io.imread(imgfile) 106 | sized = resize(img, (m.width, m.height)) * 255 107 | 108 | for i in range(2): 109 | start = time.time() 110 | boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) 111 | finish = time.time() 112 | if i == 1: 113 | print('%s: Predicted in %f seconds.' % (imgfile, (finish - start))) 114 | 115 | class_names = load_class_names(namesfile) 116 | plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names) 117 | 118 | 119 | if __name__ == '__main__': 120 | if len(sys.argv) == 5: 121 | cfgfile = sys.argv[1] 122 | weightfile = sys.argv[2] 123 | imgfolder = sys.argv[3] 124 | cv2.namedWindow('{}'.format(os.path.basename(imgfolder)), cv2.WINDOW_NORMAL) 125 | cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000, 800) 126 | globals()["namesfile"] = sys.argv[4] 127 | detect(cfgfile, weightfile, imgfolder) 128 | # detect_cv2(cfgfile, weightfile, imgfile) 129 | # detect_skimage(cfgfile, weightfile, imgfile) 130 | else: 131 | print('Usage: ') 132 | print(' python detect.py cfgfile weightfile imgfolder names') 133 | # detect('cfg/tiny-yolo-voc.cfg', 'tiny-yolo-voc.weights', 'data/person.jpg', version=1) 134 | -------------------------------------------------------------------------------- /detector/YOLO3/detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | 5 | from .darknet import Darknet 6 | from .yolo_utils import get_all_boxes, nms, post_process, xywh_to_xyxy, xyxy_to_xywh 7 | 8 | 9 | class YOLOv3(object): 10 | def __init__(self, cfgfile, weightfile, namesfile, score_thresh=0.7, conf_thresh=0.01, nms_thresh=0.45, is_xywh=False, use_cuda=True): 11 | # net definition 12 | self.net = Darknet(cfgfile) 13 | self.net.load_weights(weightfile) 14 | print('Loaded weights from %s.' % (weightfile)) 15 | self.device = "cuda" if use_cuda else "cpu" 16 | self.net.eval() 17 | self.net.to(self.device) 18 | 19 | # constants 20 | self.size = self.net.width, self.net.height 21 | self.score_thresh = score_thresh 22 | self.conf_thresh = conf_thresh 23 | self.nms_thresh = nms_thresh 24 | self.use_cuda = use_cuda 25 | self.is_xywh = is_xywh 26 | self.num_classes = self.net.num_classes 27 | self.class_names = self.load_class_names(namesfile) 28 | 29 | def __call__(self, ori_img): 30 | # img to tensor 31 | assert isinstance(ori_img, np.ndarray), "input must be a numpy array!" 32 | img = ori_img.astype(np.float)/255. 33 | 34 | img = cv2.resize(img, self.size) 35 | img = torch.from_numpy(img).float().permute(2, 0, 1).unsqueeze(0) 36 | 37 | # forward 38 | with torch.no_grad(): 39 | img = img.to(self.device) 40 | out_boxes = self.net(img) 41 | boxes = get_all_boxes(out_boxes, self.conf_thresh, self.num_classes, use_cuda=self.use_cuda) #batch size is 1 42 | # boxes = nms(boxes, self.nms_thresh) 43 | # nms嵌入到下面的处理函数中 44 | boxes = post_process(boxes, self.net.num_classes, self.conf_thresh, self.nms_thresh)[0].cpu() 45 | boxes = boxes[boxes[:, -2]> self.score_thresh, :] # bbox xmin ymin xmax ymax 46 | 47 | if len(boxes) == 0: 48 | return None, None, None 49 | 50 | height, width = ori_img.shape[:2] 51 | bbox = boxes[:,:4] 52 | if self.is_xywh: 53 | # bbox x y w h 54 | bbox = xyxy_to_xywh(bbox) 55 | 56 | bbox = bbox * torch.FloatTensor([[width, height, width, height]]) 57 | cls_conf = boxes[:,5] 58 | cls_ids = boxes[:,6].long() 59 | return bbox.numpy(), cls_conf.numpy(), cls_ids.numpy() 60 | 61 | def load_class_names(self,namesfile): 62 | with open(namesfile, 'r', encoding='utf8') as fp: 63 | class_names = [line.strip() for line in fp.readlines()] 64 | return class_names 65 | -------------------------------------------------------------------------------- /detector/YOLO3/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms import boxes_nms -------------------------------------------------------------------------------- /detector/YOLO3/nms/nms.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import torchvision 3 | 4 | try: 5 | import torch 6 | import torch_extension 7 | 8 | _nms = torch_extension.nms 9 | except ImportError: 10 | if torchvision.__version__ >= '0.3.0': 11 | _nms = torchvision.ops.nms 12 | else: 13 | from .python_nms import python_nms 14 | 15 | _nms = python_nms 16 | warnings.warn('You are using python version NMS, which is very very slow. Try compile c++ NMS ' 17 | 'using `cd ext & python build.py build_ext develop`') 18 | 19 | 20 | def boxes_nms(boxes, scores, nms_thresh, max_count=-1): 21 | """ Performs non-maximum suppression, run on GPU or CPU according to 22 | boxes's device. 23 | Args: 24 | boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(or relative coordinates), shape is (n, 4) 25 | scores(Tensor): scores, shape is (n, ) 26 | nms_thresh(float): thresh 27 | max_count (int): if > 0, then only the top max_proposals are kept after non-maximum suppression 28 | Returns: 29 | indices kept. 30 | """ 31 | keep = _nms(boxes, scores, nms_thresh) 32 | if max_count > 0: 33 | keep = keep[:max_count] 34 | return keep 35 | -------------------------------------------------------------------------------- /detector/YOLO3/nms/python_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def python_nms(boxes, scores, nms_thresh): 6 | if boxes.numel() == 0: 7 | return torch.empty((0,), dtype=torch.long) 8 | # Use numpy to run nms. Running nms in PyTorch code on CPU is really slow. 9 | origin_device = boxes.device 10 | cpu_device = torch.device('cpu') 11 | boxes = boxes.to(cpu_device).numpy() 12 | scores = scores.to(cpu_device).numpy() 13 | 14 | x1 = boxes[:, 0] 15 | y1 = boxes[:, 1] 16 | x2 = boxes[:, 2] 17 | y2 = boxes[:, 3] 18 | areas = (x2 - x1) * (y2 - y1) 19 | order = np.argsort(scores)[::-1] 20 | num_detections = boxes.shape[0] 21 | suppressed = np.zeros((num_detections,), dtype=np.bool) 22 | for _i in range(num_detections): 23 | i = order[_i] 24 | if suppressed[i]: 25 | continue 26 | ix1 = x1[i] 27 | iy1 = y1[i] 28 | ix2 = x2[i] 29 | iy2 = y2[i] 30 | iarea = areas[i] 31 | 32 | for _j in range(_i + 1, num_detections): 33 | j = order[_j] 34 | if suppressed[j]: 35 | continue 36 | 37 | xx1 = max(ix1, x1[j]) 38 | yy1 = max(iy1, y1[j]) 39 | xx2 = min(ix2, x2[j]) 40 | yy2 = min(iy2, y2[j]) 41 | w = max(0, xx2 - xx1) 42 | h = max(0, yy2 - yy1) 43 | 44 | inter = w * h 45 | ovr = inter / (iarea + areas[j] - inter) 46 | if ovr >= nms_thresh: 47 | suppressed[j] = True 48 | keep = np.nonzero(suppressed == 0)[0] 49 | keep = torch.from_numpy(keep).to(origin_device) 50 | return keep 51 | -------------------------------------------------------------------------------- /detector/YOLO3/region_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import sys 4 | import time 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu 9 | 10 | class RegionLayer(nn.Module): 11 | def __init__(self, num_classes=0, anchors=[], num_anchors=1, use_cuda=None): 12 | super(RegionLayer, self).__init__() 13 | use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda) 14 | self.device = torch.device("cuda" if use_cuda else "cpu") 15 | self.num_classes = num_classes 16 | self.num_anchors = num_anchors 17 | self.anchor_step = len(anchors)//num_anchors 18 | #self.anchors = torch.stack(torch.FloatTensor(anchors).split(self.anchor_step)).to(self.device) 19 | self.anchors = torch.FloatTensor(anchors).view(self.num_anchors, self.anchor_step).to(self.device) 20 | self.rescore = 1 21 | self.coord_scale = 1 22 | self.noobject_scale = 1 23 | self.object_scale = 5 24 | self.class_scale = 1 25 | self.thresh = 0.6 26 | self.seen = 0 27 | 28 | def build_targets(self, pred_boxes, target, nH, nW): 29 | nB = target.size(0) 30 | nA = self.num_anchors 31 | conf_mask = torch.ones (nB, nA, nH, nW) * self.noobject_scale 32 | coord_mask = torch.zeros(nB, nA, nH, nW) 33 | cls_mask = torch.zeros(nB, nA, nH, nW) 34 | tcoord = torch.zeros( 4, nB, nA, nH, nW) 35 | tconf = torch.zeros(nB, nA, nH, nW) 36 | tcls = torch.zeros(nB, nA, nH, nW) 37 | 38 | nAnchors = nA*nH*nW 39 | nPixels = nH*nW 40 | nGT = 0 # number of ground truth 41 | nRecall = 0 42 | # it works faster on CPU than on GPU. 43 | anchors = self.anchors.to("cpu") 44 | 45 | if self.seen < 12800: 46 | tcoord[0].fill_(0.5) 47 | tcoord[1].fill_(0.5) 48 | coord_mask.fill_(1) 49 | 50 | for b in range(nB): 51 | cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t() 52 | cur_ious = torch.zeros(nAnchors) 53 | tbox = target[b].view(-1,5).to("cpu") 54 | for t in range(50): 55 | if tbox[t][1] == 0: 56 | break 57 | gx, gw = [ i * nW for i in (tbox[t][1], tbox[t][3]) ] 58 | gy, gh = [ i * nH for i in (tbox[t][2], tbox[t][4]) ] 59 | cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors,1).t() 60 | cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False)) 61 | ignore_ix = cur_ious>self.thresh 62 | conf_mask[b][ignore_ix.view(nA,nH,nW)] = 0 63 | 64 | for t in range(50): 65 | if tbox[t][1] == 0: 66 | break 67 | nGT += 1 68 | gx, gw = [ i * nW for i in (tbox[t][1], tbox[t][3]) ] 69 | gy, gh = [ i * nH for i in (tbox[t][2], tbox[t][4]) ] 70 | gw, gh = gw.float(), gh.float() 71 | gi, gj = int(gx), int(gy) 72 | 73 | tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA,1).t() 74 | anchor_boxes = torch.cat((torch.zeros(nA, 2), anchors),1).t() 75 | tmp_ious = multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False) 76 | best_iou, best_n = torch.max(tmp_ious, 0) 77 | 78 | if self.anchor_step == 4: # this part is not tested. 79 | tmp_ious_mask = (tmp_ious==best_iou) 80 | if tmp_ious_mask.sum() > 0: 81 | gt_pos = torch.FloatTensor([gi, gj, gx, gy]).repeat(nA,1).t() 82 | an_pos = anchor_boxes[4:6] # anchor_boxes are consisted of [0 0 aw ah ax ay] 83 | dist = pow(((gt_pos[0]+an_pos[0])-gt_pos[2]),2) + pow(((gt_pos[1]+an_pos[1])-gt_pos[3]),2) 84 | dist[1-tmp_ious_mask]=10000 # set the large number for the small ious 85 | _, best_n = torch.min(dist,0) 86 | 87 | gt_box = torch.FloatTensor([gx, gy, gw, gh]) 88 | pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi] 89 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) 90 | 91 | coord_mask[b][best_n][gj][gi] = 1 92 | cls_mask [b][best_n][gj][gi] = 1 93 | conf_mask [b][best_n][gj][gi] = self.object_scale 94 | tcoord [0][b][best_n][gj][gi] = gx - gi 95 | tcoord [1][b][best_n][gj][gi] = gy - gj 96 | tcoord [2][b][best_n][gj][gi] = math.log(gw/anchors[best_n][0]) 97 | tcoord [3][b][best_n][gj][gi] = math.log(gh/anchors[best_n][1]) 98 | tcls [b][best_n][gj][gi] = tbox[t][0] 99 | tconf [b][best_n][gj][gi] = iou if self.rescore else 1. 100 | if iou > 0.5: 101 | nRecall += 1 102 | 103 | return nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls 104 | 105 | def get_mask_boxes(self, output): 106 | if not isinstance(self.anchors, torch.Tensor): 107 | self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device) 108 | masked_anchors = self.anchors.view(-1) 109 | num_anchors = torch.IntTensor([self.num_anchors]).to(self.device) 110 | return {'x':output, 'a':masked_anchors, 'n':num_anchors} 111 | 112 | def forward(self, output, target): 113 | #output : BxAs*(4+1+num_classes)*H*W 114 | t0 = time.time() 115 | nB = output.data.size(0) # batch size 116 | nA = self.num_anchors 117 | nC = self.num_classes 118 | nH = output.data.size(2) 119 | nW = output.data.size(3) 120 | cls_anchor_dim = nB*nA*nH*nW 121 | 122 | if not isinstance(self.anchors, torch.Tensor): 123 | self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device) 124 | 125 | output = output.view(nB, nA, (5+nC), nH, nW) 126 | cls_grid = torch.linspace(5,5+nC-1,nC).long().to(self.device) 127 | ix = torch.LongTensor(range(0,5)).to(self.device) 128 | pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) 129 | 130 | coord = output.index_select(2, ix[0:4]).view(nB*nA, -1, nH*nW).transpose(0,1).contiguous().view(-1,cls_anchor_dim) # x, y, w, h 131 | coord[0:2] = coord[0:2].sigmoid() # x, y 132 | conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() 133 | cls = output.index_select(2, cls_grid) 134 | cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(cls_anchor_dim, nC) 135 | 136 | t1 = time.time() 137 | grid_x = torch.linspace(0, nW-1, nW).repeat(nB*nA, nH, 1).view(cls_anchor_dim).to(self.device) 138 | grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(cls_anchor_dim).to(self.device) 139 | anchor_w = self.anchors.index_select(1, ix[0]).repeat(1, nB*nH*nW).view(cls_anchor_dim) 140 | anchor_h = self.anchors.index_select(1, ix[1]).repeat(1, nB*nH*nW).view(cls_anchor_dim) 141 | 142 | pred_boxes[0] = coord[0] + grid_x 143 | pred_boxes[1] = coord[1] + grid_y 144 | pred_boxes[2] = coord[2].exp() * anchor_w 145 | pred_boxes[3] = coord[3].exp() * anchor_h 146 | # for build_targets. it works faster on CPU than on GPU 147 | pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)).detach() 148 | 149 | t2 = time.time() 150 | nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \ 151 | self.build_targets(pred_boxes, target.detach(), nH, nW) 152 | 153 | cls_mask = (cls_mask == 1) 154 | tcls = tcls[cls_mask].long().view(-1) 155 | cls_mask = cls_mask.view(-1, 1).repeat(1,nC).to(self.device) 156 | cls = cls[cls_mask].view(-1, nC) 157 | 158 | nProposals = int((conf > 0.25).sum()) 159 | 160 | tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) 161 | tconf, tcls = tconf.to(self.device), tcls.to(self.device) 162 | coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.sqrt().to(self.device) 163 | 164 | t3 = time.time() 165 | loss_coord = self.coord_scale * nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2 166 | # sqrt(object_scale)/2 is almost equal to 1. 167 | loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask)/2 168 | loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 169 | loss = loss_coord + loss_conf + loss_cls 170 | t4 = time.time() 171 | if False: 172 | print('-'*30) 173 | print(' activation : %f' % (t1 - t0)) 174 | print(' create pred_boxes : %f' % (t2 - t1)) 175 | print(' build targets : %f' % (t3 - t2)) 176 | print(' create loss : %f' % (t4 - t3)) 177 | print(' total : %f' % (t4 - t0)) 178 | print('%d: nGT %3d, nRC %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' 179 | % (self.seen, nGT, nRecall, nProposals, loss_coord, loss_conf, loss_cls, loss)) 180 | if math.isnan(loss.item()): 181 | print(conf, tconf) 182 | sys.exit(0) 183 | return loss 184 | -------------------------------------------------------------------------------- /detector/YOLO3/weight/tips.txt: -------------------------------------------------------------------------------- 1 | download yolo3 weights to this folder from official website -------------------------------------------------------------------------------- /detector/YOLO3/yolo_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import sys 4 | import time 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu 9 | 10 | class YoloLayer(nn.Module): 11 | def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, use_cuda=None): 12 | super(YoloLayer, self).__init__() 13 | use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda) 14 | self.device = torch.device("cuda" if use_cuda else "cpu") 15 | 16 | self.anchor_mask = anchor_mask 17 | self.num_classes = num_classes 18 | self.anchors = anchors 19 | self.num_anchors = num_anchors 20 | self.anchor_step = len(anchors)//num_anchors 21 | self.rescore = 0 22 | self.ignore_thresh = 0.5 23 | self.truth_thresh = 1. 24 | self.stride = 32 25 | self.nth_layer = 0 26 | self.seen = 0 27 | self.net_width = 0 28 | self.net_height = 0 29 | 30 | def get_mask_boxes(self, output): 31 | masked_anchors = [] 32 | for m in self.anchor_mask: 33 | masked_anchors += self.anchors[m*self.anchor_step:(m+1)*self.anchor_step] 34 | masked_anchors = [anchor/self.stride for anchor in masked_anchors] 35 | 36 | masked_anchors = torch.FloatTensor(masked_anchors).to(self.device) 37 | num_anchors = torch.IntTensor([len(self.anchor_mask)]).to(self.device) 38 | return {'x':output, 'a':masked_anchors, 'n':num_anchors} 39 | 40 | def build_targets(self, pred_boxes, target, anchors, nA, nH, nW): 41 | nB = target.size(0) 42 | anchor_step = anchors.size(1) # anchors[nA][anchor_step] 43 | conf_mask = torch.ones (nB, nA, nH, nW) 44 | coord_mask = torch.zeros(nB, nA, nH, nW) 45 | cls_mask = torch.zeros(nB, nA, nH, nW) 46 | tcoord = torch.zeros( 4, nB, nA, nH, nW) 47 | tconf = torch.zeros(nB, nA, nH, nW) 48 | tcls = torch.zeros(nB, nA, nH, nW) 49 | twidth, theight = self.net_width/self.stride, self.net_height/self.stride 50 | 51 | nAnchors = nA*nH*nW 52 | nPixels = nH*nW 53 | nGT = 0 54 | nRecall = 0 55 | nRecall75 = 0 56 | 57 | # it works faster on CPU than on GPU. 58 | anchors = anchors.to("cpu") 59 | 60 | for b in range(nB): 61 | cur_pred_boxes = pred_boxes[b*nAnchors:(b+1)*nAnchors].t() 62 | cur_ious = torch.zeros(nAnchors) 63 | tbox = target[b].view(-1,5).to("cpu") 64 | for t in range(50): 65 | if tbox[t][1] == 0: 66 | break 67 | gx, gy = tbox[t][1] * nW, tbox[t][2] * nH 68 | gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight 69 | cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors,1).t() 70 | cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False)) 71 | ignore_ix = cur_ious>self.ignore_thresh 72 | conf_mask[b][ignore_ix.view(nA,nH,nW)] = 0 73 | 74 | for t in range(50): 75 | if tbox[t][1] == 0: 76 | break 77 | nGT += 1 78 | gx, gy = tbox[t][1] * nW, tbox[t][2] * nH 79 | gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight 80 | gw, gh = gw.float(), gh.float() 81 | gi, gj = int(gx), int(gy) 82 | 83 | tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA,1).t() 84 | anchor_boxes = torch.cat((torch.zeros(nA, anchor_step), anchors),1).t() 85 | _, best_n = torch.max(multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False), 0) 86 | 87 | gt_box = torch.FloatTensor([gx, gy, gw, gh]) 88 | pred_box = pred_boxes[b*nAnchors+best_n*nPixels+gj*nW+gi] 89 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) 90 | 91 | coord_mask[b][best_n][gj][gi] = 1 92 | cls_mask [b][best_n][gj][gi] = 1 93 | conf_mask [b][best_n][gj][gi] = 1 94 | tcoord [0][b][best_n][gj][gi] = gx - gi 95 | tcoord [1][b][best_n][gj][gi] = gy - gj 96 | tcoord [2][b][best_n][gj][gi] = math.log(gw/anchors[best_n][0]) 97 | tcoord [3][b][best_n][gj][gi] = math.log(gh/anchors[best_n][1]) 98 | tcls [b][best_n][gj][gi] = tbox[t][0] 99 | tconf [b][best_n][gj][gi] = iou if self.rescore else 1. 100 | 101 | if iou > 0.5: 102 | nRecall += 1 103 | if iou > 0.75: 104 | nRecall75 += 1 105 | 106 | return nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls 107 | 108 | def forward(self, output, target): 109 | #output : BxAs*(4+1+num_classes)*H*W 110 | mask_tuple = self.get_mask_boxes(output) 111 | t0 = time.time() 112 | nB = output.data.size(0) # batch size 113 | nA = mask_tuple['n'].item() # num_anchors 114 | nC = self.num_classes 115 | nH = output.data.size(2) 116 | nW = output.data.size(3) 117 | anchor_step = mask_tuple['a'].size(0)//nA 118 | anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device) 119 | cls_anchor_dim = nB*nA*nH*nW 120 | 121 | output = output.view(nB, nA, (5+nC), nH, nW) 122 | cls_grid = torch.linspace(5,5+nC-1,nC).long().to(self.device) 123 | ix = torch.LongTensor(range(0,5)).to(self.device) 124 | pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) 125 | 126 | coord = output.index_select(2, ix[0:4]).view(nB*nA, -1, nH*nW).transpose(0,1).contiguous().view(-1,cls_anchor_dim) # x, y, w, h 127 | coord[0:2] = coord[0:2].sigmoid() # x, y 128 | conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() 129 | cls = output.index_select(2, cls_grid) 130 | cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(cls_anchor_dim, nC) 131 | 132 | t1 = time.time() 133 | grid_x = torch.linspace(0, nW-1, nW).repeat(nB*nA, nH, 1).view(cls_anchor_dim).to(self.device) 134 | grid_y = torch.linspace(0, nH-1, nH).repeat(nW,1).t().repeat(nB*nA, 1, 1).view(cls_anchor_dim).to(self.device) 135 | anchor_w = anchors.index_select(1, ix[0]).repeat(1, nB*nH*nW).view(cls_anchor_dim) 136 | anchor_h = anchors.index_select(1, ix[1]).repeat(1, nB*nH*nW).view(cls_anchor_dim) 137 | 138 | pred_boxes[0] = coord[0] + grid_x 139 | pred_boxes[1] = coord[1] + grid_y 140 | pred_boxes[2] = coord[2].exp() * anchor_w 141 | pred_boxes[3] = coord[3].exp() * anchor_h 142 | # for build_targets. it works faster on CPU than on GPU 143 | pred_boxes = convert2cpu(pred_boxes.transpose(0,1).contiguous().view(-1,4)).detach() 144 | 145 | t2 = time.time() 146 | nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \ 147 | self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW) 148 | 149 | cls_mask = (cls_mask == 1) 150 | tcls = tcls[cls_mask].long().view(-1) 151 | cls_mask = cls_mask.view(-1, 1).repeat(1,nC).to(self.device) 152 | cls = cls[cls_mask].view(-1, nC) 153 | 154 | nProposals = int((conf > 0.25).sum()) 155 | 156 | tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) 157 | tconf, tcls = tconf.to(self.device), tcls.to(self.device) 158 | coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.to(self.device) 159 | 160 | t3 = time.time() 161 | loss_coord = nn.MSELoss(size_average=False)(coord*coord_mask, tcoord*coord_mask)/2 162 | loss_conf = nn.MSELoss(size_average=False)(conf*conf_mask, tconf*conf_mask) 163 | loss_cls = nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 164 | loss = loss_coord + loss_conf + loss_cls 165 | 166 | t4 = time.time() 167 | if False: 168 | print('-'*30) 169 | print(' activation : %f' % (t1 - t0)) 170 | print(' create pred_boxes : %f' % (t2 - t1)) 171 | print(' build targets : %f' % (t3 - t2)) 172 | print(' create loss : %f' % (t4 - t3)) 173 | print(' total : %f' % (t4 - t0)) 174 | print('%d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' 175 | % (self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord, loss_conf, loss_cls, loss)) 176 | if math.isnan(loss.item()): 177 | print(conf, tconf) 178 | sys.exit(0) 179 | return loss 180 | -------------------------------------------------------------------------------- /detector/YOLO3/yolo_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import math 4 | import torch 5 | import numpy as np 6 | from PIL import Image, ImageDraw, ImageFont 7 | import struct # get_image_size 8 | import imghdr # get_image_size 9 | from .nms import boxes_nms 10 | 11 | 12 | def sigmoid(x): 13 | return 1.0 / (math.exp(-x) + 1.) 14 | 15 | 16 | def softmax(x): 17 | x = torch.exp(x - torch.max(x)) 18 | x = x / x.sum() 19 | return x 20 | 21 | 22 | def bbox_iou(box1, box2, x1y1x2y2=True): 23 | if x1y1x2y2: 24 | x1_min = min(box1[0], box2[0]) 25 | x2_max = max(box1[2], box2[2]) 26 | y1_min = min(box1[1], box2[1]) 27 | y2_max = max(box1[3], box2[3]) 28 | w1, h1 = box1[2] - box1[0], box1[3] - box1[1] 29 | w2, h2 = box2[2] - box2[0], box2[3] - box2[1] 30 | else: 31 | w1, h1 = box1[2], box1[3] 32 | w2, h2 = box2[2], box2[3] 33 | x1_min = min(box1[0] - w1 / 2.0, box2[0] - w2 / 2.0) 34 | x2_max = max(box1[0] + w1 / 2.0, box2[0] + w2 / 2.0) 35 | y1_min = min(box1[1] - h1 / 2.0, box2[1] - h2 / 2.0) 36 | y2_max = max(box1[1] + h1 / 2.0, box2[1] + h2 / 2.0) 37 | 38 | w_union = x2_max - x1_min 39 | h_union = y2_max - y1_min 40 | w_cross = w1 + w2 - w_union 41 | h_cross = h1 + h2 - h_union 42 | carea = 0 43 | if w_cross <= 0 or h_cross <= 0: 44 | return 0.0 45 | 46 | area1 = w1 * h1 47 | area2 = w2 * h2 48 | carea = w_cross * h_cross 49 | uarea = area1 + area2 - carea 50 | return float(carea / uarea) 51 | 52 | 53 | def multi_bbox_ious(boxes1, boxes2, x1y1x2y2=True): 54 | if x1y1x2y2: 55 | x1_min = torch.min(boxes1[0], boxes2[0]) 56 | x2_max = torch.max(boxes1[2], boxes2[2]) 57 | y1_min = torch.min(boxes1[1], boxes2[1]) 58 | y2_max = torch.max(boxes1[3], boxes2[3]) 59 | w1, h1 = boxes1[2] - boxes1[0], boxes1[3] - boxes1[1] 60 | w2, h2 = boxes2[2] - boxes2[0], boxes2[3] - boxes2[1] 61 | else: 62 | w1, h1 = boxes1[2], boxes1[3] 63 | w2, h2 = boxes2[2], boxes2[3] 64 | x1_min = torch.min(boxes1[0] - w1 / 2.0, boxes2[0] - w2 / 2.0) 65 | x2_max = torch.max(boxes1[0] + w1 / 2.0, boxes2[0] + w2 / 2.0) 66 | y1_min = torch.min(boxes1[1] - h1 / 2.0, boxes2[1] - h2 / 2.0) 67 | y2_max = torch.max(boxes1[1] + h1 / 2.0, boxes2[1] + h2 / 2.0) 68 | 69 | w_union = x2_max - x1_min 70 | h_union = y2_max - y1_min 71 | w_cross = w1 + w2 - w_union 72 | h_cross = h1 + h2 - h_union 73 | mask = (((w_cross <= 0) + (h_cross <= 0)) > 0) 74 | area1 = w1 * h1 75 | area2 = w2 * h2 76 | carea = w_cross * h_cross 77 | carea[mask] = 0 78 | uarea = area1 + area2 - carea 79 | return carea / uarea 80 | 81 | 82 | def post_process(boxes, num_classes, conf_thresh=0.01, nms_thresh=0.45, obj_thresh=0.3): 83 | batch_size = boxes.size(0) 84 | 85 | # nms 86 | results_boxes = [] 87 | for batch_id in range(batch_size): 88 | processed_boxes = [] 89 | for cls_id in range(num_classes): 90 | mask = (boxes[batch_id, :, -1] == cls_id) * (boxes[batch_id, :, 4] > obj_thresh) 91 | masked_boxes = boxes[batch_id, mask] 92 | 93 | keep = boxes_nms(masked_boxes[:, :4], masked_boxes[:, 5], nms_thresh) 94 | 95 | nmsed_boxes = masked_boxes[keep, :] 96 | 97 | processed_boxes.append(nmsed_boxes) 98 | processed_boxes = torch.cat(processed_boxes, dim=0) 99 | 100 | results_boxes.append(processed_boxes) 101 | 102 | return results_boxes 103 | 104 | 105 | def xywh_to_xyxy(boxes_xywh): 106 | boxes_xyxy = boxes_xywh.copy() 107 | boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2. 108 | boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2. 109 | boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2. 110 | boxes_xyxy[:, 0] = boxes_xywh[:, 0] - boxes_xywh[:, 2] / 2. 111 | 112 | return boxes_xyxy 113 | 114 | 115 | def xyxy_to_xywh(boxes_xyxy): 116 | if isinstance(boxes_xyxy, torch.Tensor): 117 | boxes_xywh = boxes_xyxy.clone() 118 | elif isinstance(boxes_xyxy, np.ndarray): 119 | boxes_xywh = boxes_xyxy.copy() 120 | 121 | boxes_xywh[:, 0] = (boxes_xyxy[:, 0] + boxes_xyxy[:, 2]) / 2. 122 | boxes_xywh[:, 1] = (boxes_xyxy[:, 1] + boxes_xyxy[:, 3]) / 2. 123 | boxes_xywh[:, 2] = boxes_xyxy[:, 2] - boxes_xyxy[:, 0] 124 | boxes_xywh[:, 3] = boxes_xyxy[:, 3] - boxes_xyxy[:, 1] 125 | 126 | return boxes_xywh 127 | 128 | 129 | def nms(boxes, nms_thresh): 130 | if len(boxes) == 0: 131 | return boxes 132 | 133 | det_confs = torch.zeros(len(boxes)) 134 | print(boxes.shape) 135 | for i in range(len(boxes)): 136 | det_confs[i] = boxes[i][4] 137 | 138 | _, sortIds = torch.sort(det_confs, descending=True) 139 | out_boxes = [] 140 | for i in range(len(boxes)): 141 | box_i = boxes[sortIds[i]] 142 | if box_i[4] > 0: 143 | out_boxes.append(box_i) 144 | for j in range(i + 1, len(boxes)): 145 | box_j = boxes[sortIds[j]] 146 | if bbox_iou(box_i, box_j, x1y1x2y2=False) > nms_thresh: 147 | box_j[4] = 0 148 | return out_boxes 149 | 150 | 151 | def convert2cpu(gpu_matrix): 152 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) 153 | 154 | 155 | def convert2cpu_long(gpu_matrix): 156 | return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) 157 | 158 | 159 | def get_all_boxes(output, conf_thresh, num_classes, only_objectness=1, validation=False, use_cuda=True): 160 | # total number of inputs (batch size) 161 | # first element (x) for first tuple (x, anchor_mask, num_anchor) 162 | batchsize = output[0]['x'].data.size(0) 163 | 164 | all_boxes = [] 165 | for i in range(len(output)): 166 | pred, anchors, num_anchors = output[i]['x'].data, output[i]['a'], output[i]['n'].item() 167 | boxes = get_region_boxes(pred, conf_thresh, num_classes, anchors, num_anchors, 168 | only_objectness=only_objectness, validation=validation, use_cuda=use_cuda) 169 | 170 | all_boxes.append(boxes) 171 | return torch.cat(all_boxes, dim=1) 172 | 173 | 174 | def get_region_boxes(output, obj_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False, 175 | use_cuda=True): 176 | device = torch.device("cuda" if use_cuda else "cpu") 177 | anchors = anchors.to(device) 178 | anchor_step = anchors.size(0) // num_anchors 179 | if output.dim() == 3: 180 | output = output.unsqueeze(0) 181 | batch = output.size(0) 182 | assert (output.size(1) == (5 + num_classes) * num_anchors) 183 | h = output.size(2) 184 | w = output.size(3) 185 | cls_anchor_dim = batch * num_anchors * h * w 186 | 187 | # all_boxes = [] 188 | output = output.view(batch * num_anchors, 5 + num_classes, h * w).transpose(0, 1).contiguous().view(5 + num_classes, 189 | cls_anchor_dim) 190 | 191 | grid_x = torch.linspace(0, w - 1, w).repeat(batch * num_anchors, h, 1).view(cls_anchor_dim).to(device) 192 | grid_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().repeat(batch * num_anchors, 1, 1).view(cls_anchor_dim).to( 193 | device) 194 | ix = torch.LongTensor(range(0, 2)).to(device) 195 | anchor_w = anchors.view(num_anchors, anchor_step).index_select(1, ix[0]).repeat(1, batch, h * w).view( 196 | cls_anchor_dim) 197 | anchor_h = anchors.view(num_anchors, anchor_step).index_select(1, ix[1]).repeat(1, batch, h * w).view( 198 | cls_anchor_dim) 199 | 200 | xs, ys = torch.sigmoid(output[0]) + grid_x, torch.sigmoid(output[1]) + grid_y 201 | ws, hs = torch.exp(output[2]) * anchor_w.detach(), torch.exp(output[3]) * anchor_h.detach() 202 | det_confs = torch.sigmoid(output[4]) 203 | 204 | # by ysyun, dim=1 means input is 2D or even dimension else dim=0 205 | cls_confs = torch.nn.Softmax(dim=1)(output[5:5 + num_classes].transpose(0, 1)).detach() 206 | cls_max_confs, cls_max_ids = torch.max(cls_confs, 1) 207 | cls_max_confs = cls_max_confs.view(-1) 208 | cls_max_ids = cls_max_ids.view(-1).float() 209 | 210 | # sz_hw = h*w 211 | # sz_hwa = sz_hw*num_anchors 212 | # det_confs = convert2cpu(det_confs) 213 | # cls_max_confs = convert2cpu(cls_max_confs) 214 | # cls_max_ids = convert2cpu_long(cls_max_ids) 215 | # xs, ys = convert2cpu(xs), convert2cpu(ys) 216 | # ws, hs = convert2cpu(ws), convert2cpu(hs) 217 | 218 | cls_confs = det_confs * cls_max_confs 219 | 220 | # boxes = [xs/w, ys/h, ws/w, hs/h, det_confs, cls_confs, cls_max_ids] 221 | xs, ys, ws, hs = xs / w, ys / h, ws / w, hs / h 222 | x1, y1, x2, y2 = torch.clamp_min(xs - ws / 2., 0.), torch.clamp_min(ys - hs / 2., 0.), torch.clamp_max(xs + ws / 2., 223 | 1.), torch.clamp_max( 224 | ys + hs / 2., 1.) 225 | boxes = [x1, y1, x2, y2, det_confs, cls_confs, cls_max_ids] 226 | boxes = list(map(lambda x: x.view(batch, -1), boxes)) 227 | boxes = torch.stack(boxes, dim=2) 228 | 229 | # for b in range(batch): 230 | # boxes = [] 231 | # for cy in range(h): 232 | # for cx in range(w): 233 | # for i in range(num_anchors): 234 | # ind = b*sz_hwa + i*sz_hw + cy*w + cx 235 | # det_conf = det_confs[ind] 236 | # if only_objectness: 237 | # conf = det_confs[ind] 238 | # else: 239 | # conf = det_confs[ind] * cls_max_confs[ind] 240 | 241 | # if conf > conf_thresh: 242 | # bcx = xs[ind] 243 | # bcy = ys[ind] 244 | # bw = ws[ind] 245 | # bh = hs[ind] 246 | # cls_max_conf = cls_max_confs[ind] 247 | # cls_max_id = cls_max_ids[ind] 248 | # box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id] 249 | 250 | # boxes.append(box) 251 | # all_boxes.append(boxes) 252 | return boxes 253 | 254 | 255 | # def get_all_boxes(output, conf_thresh, num_classes, only_objectness=1, validation=False, use_cuda=True): 256 | # # total number of inputs (batch size) 257 | # # first element (x) for first tuple (x, anchor_mask, num_anchor) 258 | # tot = output[0]['x'].data.size(0) 259 | # all_boxes = [[] for i in range(tot)] 260 | # for i in range(len(output)): 261 | # pred, anchors, num_anchors = output[i]['x'].data, output[i]['a'], output[i]['n'].item() 262 | # b = get_region_boxes(pred, conf_thresh, num_classes, anchors, num_anchors, \ 263 | # only_objectness=only_objectness, validation=validation, use_cuda=use_cuda) 264 | # for t in range(tot): 265 | # all_boxes[t] += b[t] 266 | # return all_boxes 267 | 268 | # def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False, use_cuda=True): 269 | # device = torch.device("cuda" if use_cuda else "cpu") 270 | # anchors = anchors.to(device) 271 | # anchor_step = anchors.size(0)//num_anchors 272 | # if output.dim() == 3: 273 | # output = output.unsqueeze(0) 274 | # batch = output.size(0) 275 | # assert(output.size(1) == (5+num_classes)*num_anchors) 276 | # h = output.size(2) 277 | # w = output.size(3) 278 | # cls_anchor_dim = batch*num_anchors*h*w 279 | 280 | # t0 = time.time() 281 | # all_boxes = [] 282 | # output = output.view(batch*num_anchors, 5+num_classes, h*w).transpose(0,1).contiguous().view(5+num_classes, cls_anchor_dim) 283 | 284 | # grid_x = torch.linspace(0, w-1, w).repeat(batch*num_anchors, h, 1).view(cls_anchor_dim).to(device) 285 | # grid_y = torch.linspace(0, h-1, h).repeat(w,1).t().repeat(batch*num_anchors, 1, 1).view(cls_anchor_dim).to(device) 286 | # ix = torch.LongTensor(range(0,2)).to(device) 287 | # anchor_w = anchors.view(num_anchors, anchor_step).index_select(1, ix[0]).repeat(1, batch, h*w).view(cls_anchor_dim) 288 | # anchor_h = anchors.view(num_anchors, anchor_step).index_select(1, ix[1]).repeat(1, batch, h*w).view(cls_anchor_dim) 289 | 290 | # xs, ys = torch.sigmoid(output[0]) + grid_x, torch.sigmoid(output[1]) + grid_y 291 | # ws, hs = torch.exp(output[2]) * anchor_w.detach(), torch.exp(output[3]) * anchor_h.detach() 292 | # det_confs = torch.sigmoid(output[4]) 293 | 294 | # # by ysyun, dim=1 means input is 2D or even dimension else dim=0 295 | # cls_confs = torch.nn.Softmax(dim=1)(output[5:5+num_classes].transpose(0,1)).detach() 296 | # cls_max_confs, cls_max_ids = torch.max(cls_confs, 1) 297 | # cls_max_confs = cls_max_confs.view(-1) 298 | # cls_max_ids = cls_max_ids.view(-1) 299 | # t1 = time.time() 300 | 301 | # sz_hw = h*w 302 | # sz_hwa = sz_hw*num_anchors 303 | # det_confs = convert2cpu(det_confs) 304 | # cls_max_confs = convert2cpu(cls_max_confs) 305 | # cls_max_ids = convert2cpu_long(cls_max_ids) 306 | # xs, ys = convert2cpu(xs), convert2cpu(ys) 307 | # ws, hs = convert2cpu(ws), convert2cpu(hs) 308 | # if validation: 309 | # cls_confs = convert2cpu(cls_confs.view(-1, num_classes)) 310 | 311 | # t2 = time.time() 312 | # for b in range(batch): 313 | # boxes = [] 314 | # for cy in range(h): 315 | # for cx in range(w): 316 | # for i in range(num_anchors): 317 | # ind = b*sz_hwa + i*sz_hw + cy*w + cx 318 | # det_conf = det_confs[ind] 319 | # if only_objectness: 320 | # conf = det_confs[ind] 321 | # else: 322 | # conf = det_confs[ind] * cls_max_confs[ind] 323 | 324 | # if conf > conf_thresh: 325 | # bcx = xs[ind] 326 | # bcy = ys[ind] 327 | # bw = ws[ind] 328 | # bh = hs[ind] 329 | # cls_max_conf = cls_max_confs[ind] 330 | # cls_max_id = cls_max_ids[ind] 331 | # box = [bcx/w, bcy/h, bw/w, bh/h, det_conf, cls_max_conf, cls_max_id] 332 | # if (not only_objectness) and validation: 333 | # for c in range(num_classes): 334 | # tmp_conf = cls_confs[ind][c] 335 | # if c != cls_max_id and det_confs[ind]*tmp_conf > conf_thresh: 336 | # box.append(tmp_conf) 337 | # box.append(c) 338 | # boxes.append(box) 339 | # all_boxes.append(boxes) 340 | # t3 = time.time() 341 | # if False: 342 | # print('---------------------------------') 343 | # print('matrix computation : %f' % (t1-t0)) 344 | # print(' gpu to cpu : %f' % (t2-t1)) 345 | # print(' boxes filter : %f' % (t3-t2)) 346 | # print('---------------------------------') 347 | # return all_boxes 348 | 349 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None): 350 | import cv2 351 | colors = torch.FloatTensor([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]]) 352 | 353 | def get_color(c, x, max_val): 354 | ratio = float(x) / max_val * 5 355 | i = int(math.floor(ratio)) 356 | j = int(math.ceil(ratio)) 357 | ratio = ratio - i 358 | r = (1 - ratio) * colors[i][c] + ratio * colors[j][c] 359 | return int(r * 255) 360 | 361 | width = img.shape[1] 362 | height = img.shape[0] 363 | for i in range(len(boxes)): 364 | box = boxes[i] 365 | x1 = int(round((box[0] - box[2] / 2.0) * width)) 366 | y1 = int(round((box[1] - box[3] / 2.0) * height)) 367 | x2 = int(round((box[0] + box[2] / 2.0) * width)) 368 | y2 = int(round((box[1] + box[3] / 2.0) * height)) 369 | 370 | if color: 371 | rgb = color 372 | else: 373 | rgb = (255, 0, 0) 374 | if len(box) >= 7 and class_names: 375 | cls_conf = box[5] 376 | cls_id = box[6] 377 | # print('%s: %f' % (class_names[cls_id], cls_conf)) 378 | classes = len(class_names) 379 | offset = cls_id * 123457 % classes 380 | red = get_color(2, offset, classes) 381 | green = get_color(1, offset, classes) 382 | blue = get_color(0, offset, classes) 383 | if color is None: 384 | rgb = (red, green, blue) 385 | img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1) 386 | img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1) 387 | if savename: 388 | print("save plot results to %s" % savename) 389 | cv2.imwrite(savename, img) 390 | return img 391 | 392 | 393 | def plot_boxes(img, boxes, savename=None, class_names=None): 394 | colors = torch.FloatTensor([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]]) 395 | 396 | def get_color(c, x, max_val): 397 | ratio = float(x) / max_val * 5 398 | i = int(math.floor(ratio)) 399 | j = int(math.ceil(ratio)) 400 | ratio = ratio - i 401 | r = (1 - ratio) * colors[i][c] + ratio * colors[j][c] 402 | return int(r * 255) 403 | 404 | width = img.width 405 | height = img.height 406 | draw = ImageDraw.Draw(img) 407 | print("%d box(es) is(are) found" % len(boxes)) 408 | for i in range(len(boxes)): 409 | box = boxes[i] 410 | x1 = (box[0] - box[2] / 2.0) * width 411 | y1 = (box[1] - box[3] / 2.0) * height 412 | x2 = (box[0] + box[2] / 2.0) * width 413 | y2 = (box[1] + box[3] / 2.0) * height 414 | 415 | rgb = (255, 0, 0) 416 | if len(box) >= 7 and class_names: 417 | cls_conf = box[5] 418 | cls_id = box[6] 419 | print('%s: %f' % (class_names[cls_id], cls_conf)) 420 | classes = len(class_names) 421 | offset = cls_id * 123457 % classes 422 | red = get_color(2, offset, classes) 423 | green = get_color(1, offset, classes) 424 | blue = get_color(0, offset, classes) 425 | rgb = (red, green, blue) 426 | draw.text((x1, y1), class_names[cls_id], fill=rgb) 427 | draw.rectangle([x1, y1, x2, y2], outline=rgb) 428 | if savename: 429 | print("save plot results to %s" % savename) 430 | img.save(savename) 431 | return img 432 | 433 | 434 | def read_truths(lab_path): 435 | if not os.path.exists(lab_path): 436 | return np.array([]) 437 | if os.path.getsize(lab_path): 438 | truths = np.loadtxt(lab_path) 439 | truths = truths.reshape(truths.size // 5, 5) # to avoid single truth problem 440 | return truths 441 | else: 442 | return np.array([]) 443 | 444 | 445 | def read_truths_args(lab_path, min_box_scale): 446 | truths = read_truths(lab_path) 447 | new_truths = [] 448 | for i in range(truths.shape[0]): 449 | if truths[i][3] < min_box_scale: 450 | continue 451 | new_truths.append([truths[i][0], truths[i][1], truths[i][2], truths[i][3], truths[i][4]]) 452 | return np.array(new_truths) 453 | 454 | 455 | def load_class_names(namesfile): 456 | class_names = [] 457 | with open(namesfile, 'r', encoding='utf8') as fp: 458 | lines = fp.readlines() 459 | for line in lines: 460 | class_names.append(line.strip()) 461 | return class_names 462 | 463 | 464 | def image2torch(img): 465 | if isinstance(img, Image.Image): 466 | width = img.width 467 | height = img.height 468 | img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) 469 | img = img.view(height, width, 3).transpose(0, 1).transpose(0, 2).contiguous() 470 | img = img.view(1, 3, height, width) 471 | img = img.float().div(255.0) 472 | elif type(img) == np.ndarray: # cv2 image 473 | img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0) 474 | else: 475 | print("unknown image type") 476 | exit(-1) 477 | return img 478 | 479 | 480 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=True): 481 | model.eval() 482 | t0 = time.time() 483 | img = image2torch(img) 484 | t1 = time.time() 485 | 486 | img = img.to(torch.device("cuda" if use_cuda else "cpu")) 487 | t2 = time.time() 488 | 489 | out_boxes = model(img) 490 | boxes = get_all_boxes(out_boxes, conf_thresh, model.num_classes, use_cuda=use_cuda)[0] 491 | 492 | t3 = time.time() 493 | boxes = nms(boxes, nms_thresh) 494 | t4 = time.time() 495 | 496 | if False: 497 | print('-----------------------------------') 498 | print(' image to tensor : %f' % (t1 - t0)) 499 | print(' tensor to cuda : %f' % (t2 - t1)) 500 | print(' predict : %f' % (t3 - t2)) 501 | print(' nms : %f' % (t4 - t3)) 502 | print(' total : %f' % (t4 - t0)) 503 | print('-----------------------------------') 504 | return boxes 505 | 506 | 507 | def read_data_cfg(datacfg): 508 | options = dict() 509 | options['gpus'] = '0,1,2,3' 510 | options['num_workers'] = '10' 511 | with open(datacfg, 'r') as fp: 512 | lines = fp.readlines() 513 | 514 | for line in lines: 515 | line = line.strip() 516 | if line == '': 517 | continue 518 | key, value = line.split('=') 519 | key = key.strip() 520 | value = value.strip() 521 | options[key] = value 522 | return options 523 | 524 | 525 | def scale_bboxes(bboxes, width, height): 526 | import copy 527 | dets = copy.deepcopy(bboxes) 528 | for i in range(len(dets)): 529 | dets[i][0] = dets[i][0] * width 530 | dets[i][1] = dets[i][1] * height 531 | dets[i][2] = dets[i][2] * width 532 | dets[i][3] = dets[i][3] * height 533 | return dets 534 | 535 | 536 | def file_lines(thefilepath): 537 | count = 0 538 | thefile = open(thefilepath, 'rb') 539 | while True: 540 | buffer = thefile.read(8192 * 1024) 541 | if not buffer: 542 | break 543 | count += buffer.count(b'\n') 544 | thefile.close() 545 | return count 546 | 547 | 548 | def get_image_size(fname): 549 | '''Determine the image type of fhandle and return its size. 550 | from draco''' 551 | with open(fname, 'rb') as fhandle: 552 | head = fhandle.read(24) 553 | if len(head) != 24: 554 | return 555 | if imghdr.what(fname) == 'png': 556 | check = struct.unpack('>i', head[4:8])[0] 557 | if check != 0x0d0a1a0a: 558 | return 559 | width, height = struct.unpack('>ii', head[16:24]) 560 | elif imghdr.what(fname) == 'gif': 561 | width, height = struct.unpack('H', fhandle.read(2))[0] - 2 574 | # We are at a SOFn block 575 | fhandle.seek(1, 1) # Skip `precision' byte. 576 | height, width = struct.unpack('>HH', fhandle.read(4)) 577 | except Exception: # IGNORE:W0703 578 | return 579 | else: 580 | return 581 | return width, height 582 | 583 | 584 | def logging(message): 585 | print('%s %s' % (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), message)) 586 | -------------------------------------------------------------------------------- /detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .YOLO3 import YOLOv3 2 | 3 | 4 | __all__ = ['build_detector'] 5 | 6 | 7 | def build_detector(cfg, use_cuda): 8 | return YOLOv3(cfg.YOLOV3.CFG, cfg.YOLOV3.WEIGHT, cfg.YOLOV3.CLASS_NAMES, 9 | score_thresh=cfg.YOLOV3.SCORE_THRESH, nms_thresh=cfg.YOLOV3.NMS_THRESH, 10 | is_xywh=True, use_cuda=use_cuda) 11 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | pillow 4 | torch 5 | torchvision 6 | opencv-python 7 | scikit-learn 8 | vizer 9 | pyyaml 10 | easydict 11 | matplotlib 12 | django>=2.0 13 | tqdm 14 | cos-python-sdk-v5 15 | cython 16 | ffmpy3 17 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/utils/__init__.py -------------------------------------------------------------------------------- /utils/dataset_reconstruct.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Zhou Chen 3 | Date: 2020/3/1 4 | Desc: 数据集重构为按不同行人划分文件夹的格式 5 | """ 6 | import os 7 | import shutil 8 | import re 9 | import tqdm 10 | 11 | 12 | def reconstruct_market1501(source_path, generate_path): 13 | """ 14 | 重构MARKET数据集为不同的行人在不同的文件夹下(MARS数据集就是这种格式,无需重构) 15 | """ 16 | img_names = os.listdir(source_path) 17 | pattern = re.compile(r'([-\d]+)_c(\d)') 18 | for img_name in tqdm.tqdm(img_names): 19 | if '.jpg' not in img_name: 20 | continue 21 | # pid: 每个人的标签编号 1 22 | # _ : 摄像头号 2 23 | pid, _ = map(int, pattern.search(img_name).groups()) 24 | # 去掉没用的图片 25 | if pid == 0 or pid == -1: 26 | # 不处理的无用图片 27 | continue 28 | target_folder = os.path.join(generate_path, str(pid)) 29 | if not os.path.exists(target_folder): 30 | os.makedirs(target_folder) 31 | shutil.copy(os.path.join(source_path, img_name), os.path.join(target_folder, img_name)) 32 | 33 | 34 | if __name__ == '__main__': 35 | src_dir = r'data/Market-1501-v15.09.15/' 36 | target_dir = r'data/Market-generated/' 37 | reconstruct_market1501(src_dir, target_dir) -------------------------------------------------------------------------------- /utils/dataset_split.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import shutil 4 | import random 5 | import tqdm 6 | 7 | 8 | data_folder = '../dataset/MARS/' 9 | train_folder = '../dataset/MARS-generated/bbox_train/' 10 | test_folder = '../dataset/MARS-generated/bbox_test/' 11 | 12 | 13 | def check_folder(): 14 | if not os.path.exists(data_folder): 15 | os.mkdir(data_folder) 16 | if not os.path.exists(train_folder): 17 | os.mkdir(train_folder) 18 | if not os.path.exists(test_folder): 19 | os.mkdir(test_folder) 20 | 21 | 22 | def split_dataset(): 23 | """ 24 | 划分训练集和测试集 25 | :return: 26 | """ 27 | raw_data_folder = '../dataset/MARS/' 28 | categories = os.listdir(raw_data_folder) 29 | label_list = [] 30 | for category in tqdm.tqdm(categories): 31 | label = categories.index(category) 32 | label_list.append(label) 33 | category_folder = os.path.join(raw_data_folder, category) 34 | files = glob.glob(category_folder + '/*.jpg') 35 | random.shuffle(files) 36 | train_size = int(0.8 * len(files)) 37 | test_size = int(0.2 * len(files)) 38 | train_files = files[:train_size] 39 | test_files = files[train_size:] 40 | out_path = os.path.join(train_folder, str(label)) 41 | if not os.path.exists(out_path): 42 | os.mkdir(out_path) 43 | for img in train_files: 44 | shutil.copy(img, os.path.join(out_path, os.path.split(img)[-1])) 45 | out_path = os.path.join(test_folder, str(label)) 46 | if not os.path.exists(out_path): 47 | os.mkdir(out_path) 48 | for img in test_files: 49 | shutil.copy(img, os.path.join(out_path, os.path.split(img)[-1])) 50 | 51 | 52 | if __name__ == '__main__': 53 | check_folder() 54 | split_dataset() 55 | -------------------------------------------------------------------------------- /utils/draw_bbox.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 4 | 5 | 6 | def compute_color_for_labels(label): 7 | """ 8 | 标签颜色生成,尽量保证不同id对应的bbox框颜色不同 9 | """ 10 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 11 | return tuple(color) 12 | 13 | 14 | def draw_boxes(img, bbox, identities=None, offset=(0, 0)): 15 | """ 16 | 绘制bbox框在视频上 17 | Parameters 18 | ---------- 19 | img 20 | bbox 21 | identities 22 | offset 23 | 24 | Returns 25 | ------- 26 | 27 | """ 28 | for i, box in enumerate(bbox): 29 | x1, y1, x2, y2 = [int(i) for i in box] 30 | x1 += offset[0] 31 | x2 += offset[0] 32 | y1 += offset[1] 33 | y2 += offset[1] 34 | # box text and bar 35 | id = int(identities[i]) if identities is not None else 0 36 | color = compute_color_for_labels(id) 37 | label = '{}{:d}'.format("", id) 38 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] 39 | cv2.rectangle(img, (x1, y1), (x2, y2), color, 3) 40 | cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1) 41 | cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) 42 | return img 43 | 44 | -------------------------------------------------------------------------------- /utils/format_factory.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Zhou Chen 3 | Date: 2020/4/2 4 | Desc: 进行视频格式和编码的转换,需要安装ffmpeg包并加入当前环境的环境变量 5 | """ 6 | from ffmpy3 import FFmpeg 7 | 8 | 9 | def avi2mp4(source_path: str, target_path:str): 10 | print("start transformation") 11 | ff = FFmpeg( 12 | inputs={source_path: '-f avi'}, 13 | outputs={target_path: '-f mp4 -y'} 14 | ) 15 | print(ff.cmd) 16 | ff.run() 17 | print("finish transformation") 18 | 19 | 20 | if __name__ == '__main__': 21 | # 测试脚本 22 | avi2mp4("../result/result.avi", "../result/result.mp4") -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from easydict import EasyDict as edict 4 | 5 | 6 | class YamlParser(edict): 7 | def __init__(self, cfg_dict=None, config_file=None): 8 | if cfg_dict is None: 9 | cfg_dict = {} 10 | 11 | if config_file is not None: 12 | assert (os.path.isfile(config_file)) 13 | with open(config_file, 'r') as fo: 14 | cfg_dict.update(yaml.load(fo.read())) 15 | 16 | super(YamlParser, self).__init__(cfg_dict) 17 | 18 | def merge_from_file(self, config_file): 19 | with open(config_file, 'r', encoding="utf8") as fo: 20 | self.update(yaml.load(fo.read(), Loader=yaml.FullLoader)) 21 | 22 | def merge_from_dict(self, config_dict): 23 | self.update(config_dict) 24 | 25 | 26 | def parse_config(config_file=None): 27 | return YamlParser(config_file=config_file) 28 | 29 | 30 | if __name__ == "__main__": 31 | cfg = YamlParser(config_file="../configs/yolov3.yml") 32 | cfg.merge_from_file("../configs/deepsort.yml") 33 | print(cfg) 34 | -------------------------------------------------------------------------------- /web/README.md: -------------------------------------------------------------------------------- 1 | Demo网站 -------------------------------------------------------------------------------- /web/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Zhou Chen 3 | Date: 2020/3/17 4 | Desc: desc 5 | """ -------------------------------------------------------------------------------- /web/db.sqlite3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/web/db.sqlite3 -------------------------------------------------------------------------------- /web/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Django's command-line utility for administrative tasks.""" 3 | import os 4 | import sys 5 | 6 | 7 | def main(): 8 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'web.settings') 9 | try: 10 | from django.core.management import execute_from_command_line 11 | except ImportError as exc: 12 | raise ImportError( 13 | "Couldn't import Django. Are you sure it's installed and " 14 | "available on your PYTHONPATH environment variable? Did you " 15 | "forget to activate a virtual environment?" 16 | ) from exc 17 | execute_from_command_line(sys.argv) 18 | 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /web/static/images/bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/web/static/images/bg.png -------------------------------------------------------------------------------- /web/templates/show_images.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 显示图片 8 | 9 | 10 | 11 | 12 |     13 | 14 |
15 |

目标跟踪结果

16 | {% for image in images %} 17 | 18 | {% endfor %} 19 |
20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /web/templates/show_video.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {#依赖jQueryVideoJS实现#} 6 | 7 | 显示跟踪视频 8 | 9 | 10 | 11 | 12 | 13 | 14 |     15 | 16 |
17 |

目标跟踪结果

18 | {% load static %} 19 |
20 | 24 |
25 |
26 | 27 | 28 | 29 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /web/templates/upload.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 多目标跟踪演示 8 | 9 | 10 | 11 | 12 | 13 | 14 |
15 |

多目标跟踪演示

16 |
17 | {% csrf_token %} 18 |
19 | 20 |
21 | 22 |
23 |
24 | 25 |
26 |
27 |
28 | 29 |
30 |
31 | 32 |
33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /web/web/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luanshiyinyang/DeepSORT/7844de280a7db5b6f8a5e23c6c37ff093ac4d307/web/web/__init__.py -------------------------------------------------------------------------------- /web/web/asgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASGI config for web project. 3 | 4 | It exposes the ASGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/asgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.asgi import get_asgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'web.settings') 15 | 16 | application = get_asgi_application() 17 | -------------------------------------------------------------------------------- /web/web/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Django settings for web project. 3 | 4 | Generated by 'django-admin startproject' using Django 3.0.4. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/topics/settings/ 8 | 9 | For the full list of settings and their values, see 10 | https://docs.djangoproject.com/en/3.0/ref/settings/ 11 | """ 12 | 13 | import os 14 | 15 | # Build paths inside the project like this: os.path.join(BASE_DIR, ...) 16 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | 18 | 19 | # Quick-start development settings - unsuitable for production 20 | # See https://docs.djangoproject.com/en/3.0/howto/deployment/checklist/ 21 | 22 | # SECURITY WARNING: keep the secret key used in production secret! 23 | SECRET_KEY = '3srrhz1#(#ebb%&0+_$mkpob2(^+&=19@7moir-jm3w3ma%#pm' 24 | 25 | # SECURITY WARNING: don't run with debug turned on in production! 26 | DEBUG = True 27 | 28 | ALLOWED_HOSTS = ['*'] 29 | 30 | 31 | # Application definition 32 | 33 | INSTALLED_APPS = [ 34 | 'django.contrib.admin', 35 | 'django.contrib.auth', 36 | 'django.contrib.contenttypes', 37 | 'django.contrib.sessions', 38 | 'django.contrib.messages', 39 | 'django.contrib.staticfiles', 40 | 'web' 41 | 42 | ] 43 | 44 | MIDDLEWARE = [ 45 | 'django.middleware.security.SecurityMiddleware', 46 | 'django.contrib.sessions.middleware.SessionMiddleware', 47 | 'django.middleware.common.CommonMiddleware', 48 | 'django.middleware.csrf.CsrfViewMiddleware', 49 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 50 | 'django.contrib.messages.middleware.MessageMiddleware', 51 | 'django.middleware.clickjacking.XFrameOptionsMiddleware', 52 | ] 53 | 54 | ROOT_URLCONF = 'web.urls' 55 | 56 | TEMPLATES = [ 57 | { 58 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 59 | 'DIRS': [os.path.join(BASE_DIR, 'templates')], 60 | 'APP_DIRS': True, 61 | 'OPTIONS': { 62 | 'context_processors': [ 63 | 'django.template.context_processors.debug', 64 | 'django.template.context_processors.request', 65 | 'django.contrib.auth.context_processors.auth', 66 | 'django.contrib.messages.context_processors.messages', 67 | 'django.template.context_processors.media', 68 | ], 69 | }, 70 | }, 71 | ] 72 | 73 | WSGI_APPLICATION = 'web.wsgi.application' 74 | 75 | 76 | # Database 77 | # https://docs.djangoproject.com/en/3.0/ref/settings/#databases 78 | 79 | DATABASES = { 80 | 'default': { 81 | 'ENGINE': 'django.db.backends.sqlite3', 82 | 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'), 83 | } 84 | } 85 | 86 | 87 | # Password validation 88 | # https://docs.djangoproject.com/en/3.0/ref/settings/#auth-password-validators 89 | 90 | AUTH_PASSWORD_VALIDATORS = [ 91 | { 92 | 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', 93 | }, 94 | { 95 | 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', 96 | }, 97 | { 98 | 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', 99 | }, 100 | { 101 | 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', 102 | }, 103 | ] 104 | 105 | 106 | # Internationalization 107 | # https://docs.djangoproject.com/en/3.0/topics/i18n/ 108 | 109 | LANGUAGE_CODE = 'zh-hans' 110 | 111 | TIME_ZONE = 'Asia/Shanghai' 112 | 113 | USE_I18N = True 114 | 115 | USE_L10N = True 116 | 117 | USE_TZ = True 118 | 119 | 120 | # Static files (CSS, JavaScript, Images) 121 | # https://docs.djangoproject.com/en/3.0/howto/static-files/ 122 | 123 | STATIC_URL = '/static/' 124 | STATIC_ROOT = os.path.join(BASE_DIR, 'static') 125 | # 设置图片等静态文件的路径 126 | STATICFILES_DIRS = ( 127 | ('images', os.path.join(STATIC_ROOT, 'images').replace('\\', '/')), 128 | ('upload', os.path.join(STATIC_ROOT, 'upload').replace('\\', '/')), 129 | ('videos', os.path.join(STATIC_ROOT, 'videos').replace('\\', '/')), 130 | ('css', os.path.join(STATIC_ROOT, 'css').replace('\\', '/')), 131 | ('js', os.path.join(STATIC_ROOT, 'js').replace('\\', '/')), 132 | ) 133 | 134 | MEDIA_URL = '/media/' 135 | MEDIA_ROOT = os.path.join(BASE_DIR, 'media') 136 | -------------------------------------------------------------------------------- /web/web/urls.py: -------------------------------------------------------------------------------- 1 | from django.contrib import admin 2 | from django.contrib.staticfiles.urls import staticfiles_urlpatterns 3 | from django.urls import path 4 | from . import views 5 | from . import settings 6 | from django.conf.urls.static import static 7 | from django.conf.urls import url 8 | 9 | urlpatterns = [ 10 | path('admin/', admin.site.urls), 11 | path('', views.upload), 12 | url('video/', views.stream_video, name="video") 13 | ] 14 | 15 | urlpatterns += staticfiles_urlpatterns() 16 | urlpatterns += static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) 17 | -------------------------------------------------------------------------------- /web/web/views.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Zhou Chen 3 | Date: 2020/3/17 4 | Desc: desc 5 | """ 6 | from django.shortcuts import render 7 | import re 8 | import mimetypes 9 | from wsgiref.util import FileWrapper 10 | from django.http import StreamingHttpResponse 11 | import os 12 | import uuid 13 | from .settings import MEDIA_ROOT, STATIC_ROOT 14 | import sys 15 | sys.path.append("../") 16 | import yolo3_deepsort 17 | import utils.format_factory as ff 18 | 19 | 20 | def upload(request): 21 | if request.method == 'POST': 22 | files = request.FILES['video'] 23 | if len(files) > 0: 24 | if not os.path.exists(MEDIA_ROOT): 25 | # 若不存在媒体存储目录 26 | os.mkdir(MEDIA_ROOT) 27 | video = files 28 | extension = os.path.splitext(video.name)[1] 29 | # 重命名文件 30 | file_name = '{}{}'.format(uuid.uuid4(), extension) 31 | file_path = '{}/{}'.format(MEDIA_ROOT, file_name) 32 | # 保存文件到本机 33 | with open(file_path, 'wb') as f: 34 | for c in video.chunks(): 35 | f.write(c) 36 | # 视频保存本机之后调用模型 37 | 38 | args = yolo3_deepsort.Argument(file_path) 39 | args.output_path = os.path.join(STATIC_ROOT, 'videos', 'rst.avi') 40 | cfg = yolo3_deepsort.get_config() 41 | cfg.merge_from_file(args.config_detection) 42 | cfg.merge_from_file(args.config_deepsort) 43 | with yolo3_deepsort.VideoTracker(cfg, args, file_path) as vdo_trk: 44 | vdo_trk.run_with_limit(300) 45 | os.remove(os.path.join(STATIC_ROOT, 'videos', 'rst.mp4')) 46 | ff.avi2mp4(args.output_path, os.path.join(STATIC_ROOT, 'videos', 'rst.mp4')) 47 | 48 | return render(request, 'show_video.html', {'filename': 'rst.mp4'}) 49 | else: 50 | return render(request, 'upload.html') 51 | return render(request, 'upload.html') 52 | 53 | 54 | def file_iterator(file_name, chunk_size=8192, offset=0, length=None): 55 | with open(file_name, "rb") as f: 56 | f.seek(offset, os.SEEK_SET) 57 | remaining = length 58 | while True: 59 | bytes_length = chunk_size if remaining is None else min(remaining, chunk_size) 60 | data = f.read(bytes_length) 61 | if not data: 62 | break 63 | if remaining: 64 | remaining -= len(data) 65 | yield data 66 | 67 | 68 | def stream_video(request): 69 | path = request.GET.get('path') 70 | path = os.path.join("static", "videos", path) 71 | range_header = request.META.get('HTTP_RANGE', '').strip() 72 | range_re = re.compile(r'bytes\s*=\s*(\d+)\s*-\s*(\d*)', re.I) 73 | range_match = range_re.match(range_header) 74 | size = os.path.getsize(path) 75 | content_type, encoding = mimetypes.guess_type(path) 76 | content_type = content_type or 'application/octet-stream' 77 | if range_match: 78 | first_byte, last_byte = range_match.groups() 79 | first_byte = int(first_byte) if first_byte else 0 80 | last_byte = first_byte + 1024 * 1024 * 8 # 8M 每片,响应体最大体积 81 | if last_byte >= size: 82 | last_byte = size - 1 83 | length = last_byte - first_byte + 1 84 | resp = StreamingHttpResponse(file_iterator(path, offset=first_byte, length=length), status=206, 85 | content_type=content_type) 86 | resp['Content-Length'] = str(length) 87 | resp['Content-Range'] = 'bytes %s-%s/%s' % (first_byte, last_byte, size) 88 | else: 89 | # 不是以视频流方式的获取时,以生成器方式返回整个文件,节省内存 90 | resp = StreamingHttpResponse(FileWrapper(open(path, 'rb')), content_type=content_type) 91 | resp['Content-Length'] = str(size) 92 | resp['Accept-Ranges'] = 'bytes' 93 | return resp -------------------------------------------------------------------------------- /web/web/wsgi.py: -------------------------------------------------------------------------------- 1 | """ 2 | WSGI config for web project. 3 | 4 | It exposes the WSGI callable as a module-level variable named ``application``. 5 | 6 | For more information on this file, see 7 | https://docs.djangoproject.com/en/3.0/howto/deployment/wsgi/ 8 | """ 9 | 10 | import os 11 | 12 | from django.core.wsgi import get_wsgi_application 13 | 14 | os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'web.settings') 15 | 16 | application = get_wsgi_application() 17 | -------------------------------------------------------------------------------- /yolo3_deepsort.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import argparse 5 | import torch 6 | 7 | from detector import build_detector 8 | from deepsort import build_tracker 9 | from utils.draw_bbox import draw_boxes 10 | from utils.parse_config import parse_config 11 | 12 | current_path = os.path.dirname(__file__) 13 | 14 | 15 | class VideoTracker(object): 16 | def __init__(self, config, arguments, video_path=None): 17 | self.cfg = config 18 | self.args = arguments 19 | self.video_fps = 60 # 默认输出视频FPS为60 20 | if video_path is not None: 21 | self.args.video_path = video_path 22 | is_use_cuda = self.args.use_cuda and torch.cuda.is_available() 23 | if not is_use_cuda: 24 | print("Running programme in cpu") 25 | else: 26 | print("Running programme in gpu") 27 | 28 | if self.args.display: 29 | # 创建可视化窗口 30 | cv2.namedWindow("test", cv2.WINDOW_NORMAL) 31 | cv2.resizeWindow("test", args.show_width, args.show_height) 32 | 33 | self.vdo = cv2.VideoCapture() 34 | self.detector = build_detector(self.cfg, use_cuda=is_use_cuda) 35 | self.deepsort = build_tracker(self.cfg, use_cuda=is_use_cuda) 36 | 37 | def __enter__(self): 38 | self.vdo.open(self.args.video_path) 39 | self.video_fps = self.vdo.get(cv2.CAP_PROP_FPS) 40 | print("input video fps", self.video_fps) 41 | self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) 42 | self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) 43 | if self.args.output_path: 44 | # 视频写入时尽量保证和原视频FPS一致 45 | writer_encoder = cv2.VideoWriter_fourcc(*"XVID") if self.args.output_type == "avi" else cv2.VideoWriter_fourcc(*"X264") 46 | self.writer = cv2.VideoWriter(self.args.output_path, writer_encoder, self.video_fps, (self.im_width, self.im_height)) 47 | assert self.vdo.isOpened() 48 | return self 49 | 50 | def __exit__(self, exc_type, exc_value, exc_traceback): 51 | if exc_type: 52 | print(exc_type, exc_value, exc_traceback) 53 | self.vdo.release() 54 | self.writer.release() 55 | cv2.destroyAllWindows() 56 | 57 | def run(self): 58 | idx_frame = 0 # 帧序列号 59 | fps_list = [] 60 | while self.vdo.grab(): 61 | # 循环取帧图像 62 | idx_frame += 1 63 | start = time.time() 64 | _, ori_im = self.vdo.retrieve() # 解码并返回一帧图像 65 | im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) 66 | 67 | # 目标检测 68 | bbox_xywh, cls_confidence, cls_ids = self.detector(im) 69 | if bbox_xywh is not None: 70 | # 取出所有类别id为0的检测框,该类别id对应行人,具体可以查看yolo配置文件中的coco.names文件查看 71 | mask = (cls_ids == 0) 72 | bbox_xywh = bbox_xywh[mask] 73 | cls_confidence = cls_confidence[mask] 74 | bbox_xywh[:, 2:] *= 1.2 # 等比扩大检测框的宽度和高度,防止过小 75 | # 跟踪 76 | outputs = self.deepsort.update(bbox_xywh, cls_confidence, im) 77 | 78 | # 绘制跟踪结果框 79 | if len(outputs) > 0: 80 | bbox_xyxy = outputs[:, :4] 81 | identities = outputs[:, -1] 82 | ori_im = draw_boxes(ori_im, bbox_xyxy, identities) 83 | 84 | end = time.time() 85 | fps = 1 / (end - start) 86 | print("frame index: {}, spend time: {:.03f}s, fps: {:.03f}".format(idx_frame, end - start, fps)) 87 | fps_list.append(fps) 88 | 89 | if self.args.display: 90 | cv2.imshow("test", ori_im) 91 | cv2.waitKey(1) 92 | if idx_frame % self.args.frame_interval == 0: 93 | # 按照间隔写入视频,并非每一帧都写入 94 | if self.args.output_path: 95 | self.writer.write(ori_im) 96 | 97 | print(sum(fps_list) / idx_frame) 98 | 99 | def run_with_limit(self, frame_limit=20): 100 | idx_frame = 0 101 | while self.vdo.grab() and idx_frame < frame_limit * self.args.frame_interval: 102 | idx_frame += 1 103 | start = time.time() 104 | _, ori_im = self.vdo.retrieve() 105 | im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) 106 | 107 | bbox_xywh, cls_confidence, cls_ids = self.detector(im) 108 | if bbox_xywh is not None: 109 | mask = (cls_ids == 0) 110 | 111 | bbox_xywh = bbox_xywh[mask] 112 | bbox_xywh[:, 2:] *= 1.2 113 | cls_confidence = cls_confidence[mask] 114 | 115 | outputs = self.deepsort.update(bbox_xywh, cls_confidence, im) 116 | 117 | if len(outputs) > 0: 118 | bbox_xyxy = outputs[:, :4] 119 | identities = outputs[:, -1] 120 | ori_im = draw_boxes(ori_im, bbox_xyxy, identities) 121 | 122 | end = time.time() 123 | print("frame index: {}, spend time: {:.03f}s, fps: {:.03f}".format(idx_frame, end - start, 1 / (end - start))) 124 | 125 | if self.args.display: 126 | cv2.imshow("test", ori_im) 127 | cv2.waitKey(1) 128 | if idx_frame % self.args.frame_interval == 0: 129 | if self.args.output_path: 130 | self.writer.write(ori_im) 131 | # file_path = os.path.join(save_path, '{}.png'.format(idx_frame)) 132 | # result_path.append(os.path.split(file_path)[-1]) # 只返回文件名,不包含完整路径,这是为了配合Django的静态文件设置 133 | # cv2.imwrite(file_path, ori_im) 134 | return None 135 | 136 | 137 | def parse_arguments(): 138 | """ 139 | 解析命令行脚本参数 140 | :return: 141 | """ 142 | parser = argparse.ArgumentParser() 143 | parser.add_argument("--video_path", type=str, default='./data/TownCentreXVID.avi') # 进行跟踪的源视频 144 | parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yml") # yolo3检测配置文件 145 | parser.add_argument("--config_deepsort", type=str, default="./configs/deepsort.yml") # deepsort跟踪配置文件 146 | parser.add_argument("--frame_interval", type=int, default=1) # 输出视频帧间隔 147 | parser.add_argument("--show_window", dest="display", default=False) # 是否视频控制台显示 148 | parser.add_argument("--show_width", type=int, default=800) # 输出视频宽度 149 | parser.add_argument("--show_height", type=int, default=600) # 输出视频高度 150 | parser.add_argument("--output_path", type=str, default="./results/result.avi") # 输出视频保存路径 151 | parser.add_argument("--use_cuda", action="store_true", default=True) # 是否使用GPU 152 | parser.add_argument("--output_type", type=str, default="avi") 153 | return parser.parse_args() 154 | 155 | 156 | class Argument(object): 157 | def __init__(self, video_path): 158 | """ 159 | 模块调用参数,与上面的命令行参数选择其一,防止模块化不能调用命令行参数 160 | :param video_path: 161 | """ 162 | self.video_path = video_path # 输入视频路径 163 | self.config_detection = os.path.join(current_path, 'configs/yolov3.yml') # 检测器配置文件 164 | self.config_deepsort = os.path.join(current_path, 'configs/deepsort.yml') # deepsort算法配置文件 165 | self.display = False # 默认API调用模式不显示opencv窗口 166 | self.frame_interval = 1 # 输出帧间隔默认为1,此种情况下若输出视频与输入视频FPS为相等,则输出视频与输入视频等时长 167 | self.show_width = 800 # 输出视频宽度 168 | self.show_height = 600 # 输出视频高度 169 | self.output_path = os.path.join(current_path, 'result/result.avi') # 输出视频文件路径 170 | self.output_type = "avi" 171 | self.use_cuda = True # 是否使用GPU 172 | 173 | 174 | def get_config(): 175 | return parse_config() 176 | 177 | 178 | if __name__ == "__main__": 179 | args = parse_arguments() 180 | cfg = parse_config() 181 | cfg.merge_from_file(args.config_detection) # 获取检测配置文件 182 | cfg.merge_from_file(args.config_deepsort) # 获取deepsort算法配置文件 183 | 184 | with VideoTracker(cfg, args) as vdo_trk: 185 | vdo_trk.run() 186 | -------------------------------------------------------------------------------- /yolo3_deepsort_camera.py: -------------------------------------------------------------------------------- 1 | """ 2 | Author: Zhou Chen 3 | Date: 2020/6/4 4 | Desc: 实时摄像头跟踪的模块 5 | """ 6 | import os 7 | import cv2 8 | import time 9 | import argparse 10 | import torch 11 | 12 | from detector import build_detector 13 | from deepsort import build_tracker 14 | from utils.draw_bbox import draw_boxes 15 | from utils.parse_config import parse_config 16 | 17 | current_path = os.path.dirname(__file__) 18 | 19 | 20 | class VideoTracker(object): 21 | def __init__(self, config, arguments, video_path=None): 22 | self.cfg = config 23 | self.args = arguments 24 | self.video_fps = 60 # 默认输出视频FPS为60 25 | if video_path is not None: 26 | self.args.video_path = video_path 27 | is_use_cuda = self.args.use_cuda and torch.cuda.is_available() 28 | if not is_use_cuda: 29 | print("Running programme in cpu") 30 | else: 31 | print("Running programme in gpu") 32 | 33 | if self.args.display: 34 | # 创建可视化窗口 35 | cv2.namedWindow("test", cv2.WINDOW_NORMAL) 36 | cv2.resizeWindow("test", args.show_width, args.show_height) 37 | 38 | self.camera = cv2.VideoCapture(0) 39 | self.video_width, self.video_height = args.show_width, args.show_height 40 | self.detector = build_detector(self.cfg, use_cuda=is_use_cuda) 41 | self.deepsort = build_tracker(self.cfg, use_cuda=is_use_cuda) 42 | 43 | def __enter__(self): 44 | self.video_fps = self.camera.get(cv2.CAP_PROP_FPS) 45 | print("camera capture fps:", self.video_fps) 46 | if self.args.output_path: 47 | # 视频写入时尽量保证和原视频FPS一致 48 | writer_encoder = cv2.VideoWriter_fourcc(*"XVID") 49 | self.writer = cv2.VideoWriter(self.args.output_path, writer_encoder, self.video_fps, (self.video_width, self.video_height)) 50 | assert self.camera.isOpened() 51 | return self 52 | 53 | def __exit__(self, exc_type, exc_value, exc_traceback): 54 | if exc_type: 55 | print(exc_type, exc_value, exc_traceback) 56 | self.camera.release() 57 | self.writer.release() 58 | cv2.destroyAllWindows() 59 | 60 | def run(self): 61 | idx_frame = 0 # 帧序列号 62 | fps_list = [] 63 | while self.camera.isOpened(): 64 | # 循环取帧图像 65 | idx_frame += 1 66 | start = time.time() 67 | _, ori_im = self.camera.read() # 解码并返回一帧图像 68 | im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) 69 | # 目标检测 70 | bbox_xywh, cls_confidence, cls_ids = self.detector(im) 71 | if bbox_xywh is not None: 72 | # 取出所有类别id为0的检测框,该类别id对应行人,具体可以查看yolo配置文件中的coco.names文件查看 73 | mask = (cls_ids == 0) 74 | bbox_xywh = bbox_xywh[mask] 75 | cls_confidence = cls_confidence[mask] 76 | bbox_xywh[:, 2:] *= 1.2 # 等比扩大检测框的宽度和高度,防止过小 77 | # 跟踪 78 | outputs = self.deepsort.update(bbox_xywh, cls_confidence, im) 79 | 80 | # 绘制跟踪结果框 81 | if len(outputs) > 0: 82 | bbox_xyxy = outputs[:, :4] 83 | identities = outputs[:, -1] 84 | ori_im = draw_boxes(ori_im, bbox_xyxy, identities) 85 | 86 | end = time.time() 87 | fps = 1 / (end - start) 88 | print("frame index: {}, spend time: {:.03f}s, fps: {:.03f}".format(idx_frame, end - start, fps)) 89 | fps_list.append(fps) 90 | 91 | if self.args.display: 92 | cv2.imshow("test", ori_im) 93 | cv2.waitKey(1) 94 | if idx_frame % self.args.frame_interval == 0: 95 | # 按照间隔写入视频,并非每一帧都写入 96 | if self.args.output_path: 97 | self.writer.write(ori_im) 98 | 99 | print(sum(fps_list) / idx_frame) 100 | 101 | 102 | def parse_arguments(): 103 | """ 104 | 解析命令行脚本参数 105 | :return: 106 | """ 107 | parser = argparse.ArgumentParser() 108 | parser.add_argument("--camera_id", type=int, default=0) # 调用摄像头 109 | parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yml") 110 | parser.add_argument("--config_deepsort", type=str, default="./configs/deepsort.yml") 111 | parser.add_argument("--frame_interval", type=int, default=1) # 输出视频帧间隔 112 | parser.add_argument("--show_window", dest="display", default=True) # 是否视频控制台显示 113 | parser.add_argument("--show_width", type=int, default=800) # 输出视频宽度 114 | parser.add_argument("--show_height", type=int, default=600) # 输出视频高度 115 | parser.add_argument("--output_path", type=str, default="./results/result.avi") # 输出视频保存路径 116 | parser.add_argument("--use_cuda", action="store_true", default=True) # 是否使用GPU 117 | return parser.parse_args() 118 | 119 | 120 | if __name__ == "__main__": 121 | args = parse_arguments() 122 | cfg = parse_config() 123 | cfg.merge_from_file(args.config_detection) # 获取检测配置文件 124 | cfg.merge_from_file(args.config_deepsort) # 获取deepsort算法配置文件 125 | 126 | with VideoTracker(cfg, args) as vdo_trk: 127 | vdo_trk.run() 128 | 129 | --------------------------------------------------------------------------------