├── .idea ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── object-detection-master.iml ├── other.xml ├── vcs.xml └── workspace.xml ├── README.md ├── __pycache__ └── detector.cpython-37.pyc ├── app.py ├── cam ├── __pycache__ │ ├── base_camera.cpython-37.pyc │ └── base_camera.cpython-38.pyc ├── base_camera.py ├── camera.py └── coco.names ├── deepsort ├── .gitignore ├── .idea │ ├── deep_sort_pytorch-master.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── LICENSE ├── __init__.py ├── configs │ ├── deep_sort.yaml │ ├── yolov3.yaml │ └── yolov3_tiny.yaml ├── deep_sort │ ├── __init__.py │ ├── deep │ │ ├── __init__.py │ │ ├── checkpoint │ │ │ └── .gitkeep │ │ ├── evaluate.py │ │ ├── feature_extractor.py │ │ ├── model.py │ │ ├── original_model.py │ │ ├── test.py │ │ ├── train.jpg │ │ └── train.py │ ├── deep_sort.py │ └── sort │ │ ├── __init__.py │ │ ├── detection.py │ │ ├── iou_matching.py │ │ ├── kalman_filter.py │ │ ├── linear_assignment.py │ │ ├── nn_matching.py │ │ ├── preprocessing.py │ │ ├── track.py │ │ └── tracker.py ├── detector │ ├── YOLOv3 │ │ ├── README.md │ │ ├── __init__.py │ │ ├── cfg.py │ │ ├── cfg │ │ │ ├── coco.data │ │ │ ├── coco.names │ │ │ ├── darknet19_448.cfg │ │ │ ├── tiny-yolo-voc.cfg │ │ │ ├── tiny-yolo.cfg │ │ │ ├── voc.data │ │ │ ├── voc.names │ │ │ ├── voc_gaotie.data │ │ │ ├── yolo-voc.cfg │ │ │ ├── yolo.cfg │ │ │ ├── yolo_v3.cfg │ │ │ └── yolov3-tiny.cfg │ │ ├── darknet.py │ │ ├── demo │ │ │ ├── 004545.jpg │ │ │ └── results │ │ │ │ └── 004545.jpg │ │ ├── detect.py │ │ ├── nms │ │ │ ├── __init__.py │ │ │ ├── build.sh │ │ │ ├── ext │ │ │ │ ├── __init__.py │ │ │ │ ├── build.py │ │ │ │ ├── cpu │ │ │ │ │ ├── nms_cpu.cpp │ │ │ │ │ └── vision.h │ │ │ │ ├── cuda │ │ │ │ │ ├── nms.cu │ │ │ │ │ └── vision.h │ │ │ │ ├── nms.h │ │ │ │ ├── torch_extension.cp37-win_amd64.pyd │ │ │ │ └── vision.cpp │ │ │ ├── nms.py │ │ │ └── python_nms.py │ │ ├── region_layer.py │ │ ├── weight │ │ │ └── .gitkeep │ │ ├── yolo_layer.py │ │ └── yolo_utils.py │ └── __init__.py ├── ped_det_server.py ├── scripts │ ├── yolov3_deepsort.sh │ └── yolov3_tiny_deepsort.sh └── utils │ ├── __init__.py │ ├── asserts.py │ ├── draw.py │ ├── evaluation.py │ ├── io.py │ ├── json_logger.py │ ├── log.py │ ├── parser.py │ └── tools.py ├── demo ├── 1.png ├── 2.png ├── 3.png ├── dog.jpg ├── mobile.gif └── output.gif ├── detector.py ├── requirements.txt ├── result.txt ├── static ├── ajax-loader.gif ├── client.js ├── logo.png ├── style.css ├── style1.css └── worker.js ├── templates ├── index.html ├── index1.html └── real-time.html └── text.py /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/object-detection-master.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 18 | 19 | 25 | 26 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3,DeepSort and Flask Run On The Web Page 2 | 3 | ## Computer terminal 4 | 5 | ![](demo/output.gif) 6 | 7 | ## Mobile terminal 8 | 9 | ![](demo/mobile.gif) 10 | 11 | ## Introduction 12 | Combined with yolov3, deep sort and flask, it is a target detection and multi-target tracking platform that can run on Web pages. You can upload pictures or videos. When the image is uploaded, target detection is carried out. When the video is uploaded, multi-target tracking is carried out (the default is pedestrian, which can be changed to other objects). The mobile terminal provides an online shooting interface for real-time target detection and multi-target tracking. 13 | ## Dependencies 14 | - torch 15 | - torchvision 16 | - numpy 17 | - opencv-python==4.1.2.30 18 | - lxml 19 | - tqdm 20 | - flask 21 | - seaborn 22 | - pillow 23 | - vizer 24 | - numba 25 | 26 | ## Quick Start 27 | ##### 1. Check all dependencies installed 28 | ```bash 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | ##### 2. Download YOLOv3 parameters 33 | ``` 34 | cd deepsort/detector/YOLOv3/weight/ 35 | wget https://pjreddie.com/media/files/yolov3.weights 36 | cd ../../../ 37 | ``` 38 | 39 | ##### 3. Download deepsort parameters ckpt.t7 40 | ``` 41 | cd deep_sort/deep/checkpoint 42 | # download ckpt.t7 from 43 | https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder 44 | cd ../../../ 45 | ``` 46 | 47 | ##### 4. Or download the weight file through baidu network disk 48 | [提取码:hhbb](https://pan.baidu.com/s/1blu8U3wM4NN2TpDK3U5leA ) 49 | 50 | ```angular2 51 | yolov3.weight put in deepsort/detector/YOLOv3/weight/ 52 | ckpt.t7 put in deep_sort/deep/checkpoint 53 | ``` 54 | 55 | ##### 5. Compile nms module 56 | ```bash 57 | cd detector/YOLOv3/nms 58 | sh build.sh 59 | cd ../../../ 60 | ``` 61 | or 62 | ```bash 63 | cd detector/YOLOv3/nms/ext 64 | python build.py build_ext develop 65 | cd ../../../../ 66 | ``` 67 | ##### 6. Run 68 | ``` 69 | python app.py 70 | ``` 71 | 72 | ##### 7. If you want to configure to run on the server, please visit my blog [阿里云ECS部署python,flask项目,简单易懂,无需nginx和uwsgi](https://blog.csdn.net/qq_44523137/article/details/112676287) 73 | 74 | ##### 8. You can use yolov3 demo 75 | ``` 76 | python detector.py 77 | ``` 78 | 79 | ##Result 80 | ![](demo/1.png) 81 | 82 | ![](demo/2.png) 83 | 84 | ![](demo/3.png) 85 | ## References 86 | 87 | - code: [ZQPei/deep_sort_pytorch](https://github.com/ZQPei/deep_sort_pytorch) 88 | -------------------------------------------------------------------------------- /__pycache__/detector.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/__pycache__/detector.cpython-37.pyc -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | from flask import Flask, request, Response,render_template 4 | import json 5 | from cam.base_camera import BaseCamera 6 | from deepsort.detector import build_detector 7 | from deepsort.deep_sort import build_tracker 8 | from deepsort.utils.draw import draw_boxes 9 | from deepsort.detector.YOLOv3 import YOLOv3 10 | 11 | yolo = YOLOv3(r"deepsort/detector/YOLOv3/cfg/yolo_v3.cfg", r"deepsort/detector/YOLOv3/weight/yolov3.weights",r"cam/coco.names") 12 | detector = build_detector(use_cuda=False) 13 | deepsort = build_tracker(use_cuda=False) 14 | # Initialize Flask application 15 | import os 16 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 17 | app = Flask(__name__) 18 | 19 | class_names = [c.strip() for c in open(r'cam/coco.names').readlines()] 20 | 21 | 22 | file_name = ['jpg','jpeg','png'] 23 | video_name = ['mp4','avi'] 24 | 25 | # API that returns image with detections on it 26 | @app.route('/images', methods= ['POST']) 27 | def get_image(): 28 | image = request.files["images"] 29 | image_name = image.filename 30 | 31 | with open('./result.txt', 'r') as f: 32 | im_na = f.read() 33 | try: 34 | os.remove(im_na) 35 | except: 36 | pass 37 | 38 | if image_name.split('.')[-1] in video_name: 39 | with open('./result.txt', 'w') as f: 40 | f.write(image_name) 41 | 42 | image.save(os.path.join(os.getcwd(), image_name)) 43 | 44 | if image_name.split(".")[-1] in file_name: 45 | img = cv2.imread(image_name) 46 | h,w,_ = img.shape 47 | if h > 2000 or w > 2000: 48 | h = h // 2 49 | w = w // 2 50 | img = cv2.resize(img,(int(w),int(h))) 51 | 52 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 53 | bbox, cls_conf, cls_ids = yolo(img) 54 | from vizer.draw import draw_boxes as db 55 | if bbox is not None: 56 | img = db(img, bbox, cls_ids, cls_conf, class_name_map=class_names) 57 | img = img[:, :, (2, 1, 0)] 58 | _, img_encoded = cv2.imencode('.jpg', img) 59 | response = img_encoded.tobytes() 60 | os.remove(image_name) 61 | try: 62 | return Response(response=response, status=200, mimetype='image/jpg') 63 | except: 64 | return render_template('index1.html') 65 | else: 66 | return render_template('real-time.html') 67 | 68 | class Camera(BaseCamera): 69 | @staticmethod 70 | def frames(): 71 | go = 1 72 | while True: 73 | if go == 1: 74 | with open('./result.txt', 'r') as f: 75 | image_name = f.read() 76 | fi_name = image_name 77 | cam = cv2.VideoCapture(image_name) 78 | g = 0 79 | y = 0 80 | s = 0 81 | c = 0 82 | sum = 0 83 | a = time.time() 84 | go = 0 85 | de_sum = [] 86 | de_sum.append(-1) 87 | fps = int(cam.get(cv2.CAP_PROP_FPS)) // 15 + 1 88 | else: 89 | 90 | with open('./result.txt', 'r') as f: 91 | image_name = f.read() 92 | if image_name != fi_name: 93 | go = 1 94 | continue 95 | b = time.time() - a 96 | if b > 150: 97 | break 98 | ret,img = cam.read() 99 | if ret: 100 | h, w, _ = img.shape 101 | if h > 2000 or w > 2000: 102 | h = h // 2 103 | w = w // 2 104 | img = cv2.resize(img, (int(w), int(h))) 105 | if CameraParams.gray: 106 | if g == 0: 107 | cam = cv2.VideoCapture(image_name) 108 | g = 1 109 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 110 | while (h > 512 and w > 512): 111 | h = h / 1.2 112 | w = w / 1.2 113 | h = int(h) 114 | w = int(w) 115 | img = cv2.resize(img, (w, h)) 116 | yield cv2.imencode('.jpg', img)[1].tobytes() 117 | elif CameraParams.gaussian: 118 | sum = sum + 1 119 | if sum & fps == 0: 120 | if y == 0: 121 | cam = cv2.VideoCapture(image_name) 122 | y = 1 123 | 124 | im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 125 | bbox_xywh, cls_conf, cls_ids = detector(im) 126 | 127 | mask = cls_ids == 0 128 | new_bbox_xywh = bbox_xywh[mask] 129 | new_bbox_xywh[:, 3:] *= 1.2 130 | 131 | new_cls_conf = cls_conf[mask] 132 | outputs = deepsort.update(new_bbox_xywh, new_cls_conf, im) 133 | if len(outputs) > 0: 134 | bbox_xyxy = outputs[:, :4] 135 | identities = outputs[:, -1] 136 | if -1 in de_sum: 137 | de_sum = [] 138 | else: 139 | for id in identities: 140 | if id not in de_sum: 141 | de_sum.append(id) 142 | 143 | img = draw_boxes(img, bbox_xyxy, identities) 144 | 145 | text = "people " 146 | if -1 in de_sum: 147 | de_sum = [] 148 | if (len(de_sum) > 0): 149 | text = text + str(len(de_sum)) 150 | else: 151 | text = text + str(0) 152 | cv2.putText(img, text, (50, 70), cv2.FONT_HERSHEY_COMPLEX, 3, (250, 250, 0), 8) 153 | while (h > 512 and w > 512): 154 | h = h / 1.2 155 | w = w / 1.2 156 | h = int(h) 157 | w = int(w) 158 | img = cv2.resize(img, (w, h)) 159 | yield cv2.imencode('.jpg', img)[1].tobytes() 160 | 161 | elif CameraParams.sobel: 162 | if s == 0: 163 | cam = cv2.VideoCapture(image_name) 164 | s = 1 165 | img = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5) # x 166 | img = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5) # y 167 | while (h > 512 and w > 512): 168 | h = h / 1.2 169 | w = w / 1.2 170 | h = int(h) 171 | w = int(w) 172 | img = cv2.resize(img, (w, h)) 173 | yield cv2.imencode('.jpg', img)[1].tobytes() 174 | elif CameraParams.canny: 175 | if c == 0: 176 | cam = cv2.VideoCapture(image_name) 177 | c = 1 178 | img = cv2.Canny(img, 100, 200, 3, L2gradient=True) 179 | while (h > 512 and w > 512): 180 | h = h / 1.2 181 | w = w / 1.2 182 | h = int(h) 183 | w = int(w) 184 | img = cv2.resize(img, (w, h)) 185 | yield cv2.imencode('.jpg', img)[1].tobytes() 186 | else: 187 | while (h > 512 and w > 512): 188 | h = h / 1.2 189 | w = w / 1.2 190 | h = int(h) 191 | w = int(w) 192 | img = cv2.resize(img, (w, h)) 193 | yield cv2.imencode('.jpg', img)[1].tobytes() 194 | else: 195 | cam = cv2.VideoCapture(image_name) 196 | class CameraParams(): 197 | 198 | gray = False 199 | gaussian = False 200 | sobel = False 201 | canny = False 202 | def __init__(self, gray, gaussian, sobel, canny): 203 | self.gray = gray 204 | self.gaussian = gaussian 205 | self.sobel = sobel 206 | self.canny = canny 207 | 208 | @app.route('/') 209 | def upload_file(): 210 | return render_template('index1.html') 211 | 212 | @app.route('/cameraParams', methods=['GET', 'POST']) 213 | def cameraParams(): 214 | if request.method == 'GET': 215 | data = { 216 | 'gray': CameraParams.gray, 217 | 'gaussian': CameraParams.gaussian, 218 | 'sobel': CameraParams.sobel, 219 | 'canny': CameraParams.canny, 220 | } 221 | return app.response_class(response=json.dumps(data), 222 | status=200, 223 | mimetype='application/json') 224 | elif request.method == 'POST': 225 | try: 226 | data = request.form.to_dict() 227 | CameraParams.gray = str_to_bool(data['gray']) 228 | CameraParams.gaussian = str_to_bool(data['gaussian']) 229 | CameraParams.sobel = str_to_bool(data['sobel']) 230 | CameraParams.canny = str_to_bool(data['canny']) 231 | message = {'message': 'Success'} 232 | response = app.response_class(response=json.dumps(message), 233 | status=200, 234 | mimetype='application/json') 235 | return response 236 | except Exception as e: 237 | print(e) 238 | response = app.response_class(response=json.dumps(e), 239 | status=400, 240 | mimetype='application/json') 241 | return response 242 | else: 243 | data = { "error": "Method not allowed. Please GET or POST request!" } 244 | return app.response_class(response=json.dumps(data), 245 | status=400, 246 | mimetype='application/json') 247 | 248 | @app.route('/realtime') 249 | def realtime(): 250 | return render_template('real-time.html') 251 | 252 | ########get path 253 | @app.route('/video_feed') 254 | def video_feed(): 255 | """Video streaming route. Put this in the src attribute of an img tag.""" 256 | return Response(genWeb(Camera()), 257 | mimetype='multipart/x-mixed-replace; boundary=frame') 258 | def genWeb(camera): 259 | """Video streaming generator function.""" 260 | while True: 261 | frame = camera.get_frame() 262 | yield (b'--frame\r\n' 263 | b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') 264 | 265 | def str_to_bool(s): 266 | if s == "true": 267 | return True 268 | elif s == "false": 269 | return False 270 | else: 271 | raise ValueError 272 | 273 | if __name__ == '__main__': 274 | # Run locally 275 | app.run(debug=True, host='127.0.0.1', port=5000) 276 | #Run on the server 277 | # app.run(debug=True, host = '0.0.0.0', port=5000) 278 | -------------------------------------------------------------------------------- /cam/__pycache__/base_camera.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/cam/__pycache__/base_camera.cpython-37.pyc -------------------------------------------------------------------------------- /cam/__pycache__/base_camera.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/cam/__pycache__/base_camera.cpython-38.pyc -------------------------------------------------------------------------------- /cam/base_camera.py: -------------------------------------------------------------------------------- 1 | import time 2 | import threading 3 | try: 4 | from greenlet import getcurrent as get_ident 5 | except ImportError: 6 | try: 7 | from thread import get_ident 8 | except ImportError: 9 | from _thread import get_ident 10 | 11 | 12 | class CameraEvent(object): 13 | """An Event-like class that signals all active clients when a new frame is 14 | available. 15 | """ 16 | def __init__(self): 17 | self.events = {} 18 | 19 | def wait(self): 20 | """Invoked from each client's thread to wait for the next frame.""" 21 | ident = get_ident() 22 | if ident not in self.events: 23 | # this is a new client 24 | # add an entry for it in the self.events dict 25 | # each entry has two elements, a threading.Event() and a timestamp 26 | self.events[ident] = [threading.Event(), time.time()] 27 | return self.events[ident][0].wait() 28 | 29 | def set(self): 30 | """Invoked by the camera thread when a new frame is available.""" 31 | now = time.time() 32 | remove = None 33 | for ident, event in self.events.items(): 34 | if not event[0].isSet(): 35 | # if this client's event is not set, then set it 36 | # also update the last set timestamp to now 37 | event[0].set() 38 | event[1] = now 39 | else: 40 | # if the client's event is already set, it means the client 41 | # did not process a previous frame 42 | # if the event stays set for more than 5 seconds, then assume 43 | # the client is gone and remove it 44 | if now - event[1] > 5: 45 | remove = ident 46 | if remove: 47 | del self.events[remove] 48 | 49 | def clear(self): 50 | """Invoked from each client's thread after a frame was processed.""" 51 | self.events[get_ident()][0].clear() 52 | 53 | 54 | class BaseCamera(object): 55 | thread = None # background thread that reads frames from camera 56 | frame = None # current frame is stored here by background thread 57 | last_access = 0 # time of last client access to the camera 58 | event = CameraEvent() 59 | 60 | def __init__(self): 61 | """Start the background camera thread if it isn't running yet.""" 62 | if BaseCamera.thread is None: 63 | BaseCamera.last_access = time.time() 64 | 65 | # start background frame thread 66 | BaseCamera.thread = threading.Thread(target=self._thread) 67 | BaseCamera.thread.start() 68 | 69 | # wait until frames are available 70 | while self.get_frame() is None: 71 | time.sleep(0) 72 | 73 | def get_frame(self): 74 | """Return the current camera frame.""" 75 | BaseCamera.last_access = time.time() 76 | 77 | # wait for a signal from the camera thread 78 | BaseCamera.event.wait() 79 | BaseCamera.event.clear() 80 | 81 | return BaseCamera.frame 82 | 83 | @staticmethod 84 | def frames(path): 85 | """"Generator that returns frames from the camera.""" 86 | raise RuntimeError('Must be implemented by subclasses.') 87 | 88 | @classmethod 89 | def _thread(cls): 90 | """Camera background thread.""" 91 | print('Starting camera thread.') 92 | frames_iterator = cls.frames() 93 | for frame in frames_iterator: 94 | BaseCamera.frame = frame 95 | BaseCamera.event.set() # send signal to clients 96 | time.sleep(0) 97 | 98 | # if there hasn't been any clients asking for frames in 99 | # the last 10 seconds then stop the thread 100 | if time.time() - BaseCamera.last_access > 60: 101 | frames_iterator.close() 102 | print('Stopping camera thread due to inactivity.') 103 | break 104 | BaseCamera.thread = None -------------------------------------------------------------------------------- /cam/camera.py: -------------------------------------------------------------------------------- 1 | 2 | from cam.base_camera import BaseCamera 3 | import cv2 4 | import tensorflow as tf 5 | from yolov3_tf2.models import YoloV3 6 | from yolov3_tf2.dataset import transform_images 7 | from yolov3_tf2.utils import draw_outputs 8 | 9 | # customize your API through the following parameters 10 | classes_path = 'coco.names' 11 | weights_path = './weights/yolov3.tf' 12 | tiny = False # set to True if using a Yolov3 Tiny model 13 | size = 416 # size images are resized to for model 14 | output_path = './detections/' # path to output folder where images with detections are saved 15 | num_classes = 80 # number of classes in model 16 | 17 | # load in weights and classes 18 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 19 | if len(physical_devices) > 0: 20 | tf.config.experimental.set_memory_growth(physical_devices[0], True) 21 | 22 | 23 | yolo = YoloV3(classes=num_classes) 24 | 25 | yolo.load_weights(weights_path).expect_partial() 26 | print('weights loaded') 27 | 28 | class_names = [c.strip() for c in open(classes_path).readlines()] 29 | print('classes loaded') 30 | 31 | 32 | class Camera(BaseCamera): 33 | 34 | @staticmethod 35 | def frames(): 36 | cam = cv2.VideoCapture(r'./finish.mp4') 37 | if not cam.isOpened(): 38 | raise RuntimeError('Could not start camera.') 39 | 40 | while True: 41 | # read current frame 42 | _, img = cam.read() 43 | try: 44 | if CameraParams.gray: 45 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 46 | if CameraParams.gaussian: 47 | img_raw = tf.convert_to_tensor(img) 48 | img_raw = tf.expand_dims(img_raw, 0) 49 | # img detect 50 | img_raw = transform_images(img_raw, size) 51 | boxes, scores, classes, nums = yolo(img_raw) 52 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 53 | img = draw_outputs(img, (boxes, scores, classes, nums), class_names) 54 | if CameraParams.sobel: 55 | if(len(img.shape) == 3): 56 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 57 | img = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5) # x 58 | img = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5) # y 59 | if CameraParams.canny: 60 | img = cv2.Canny(img, 100, 200, 3, L2gradient=True) 61 | except Exception as e: 62 | print(e) 63 | # encode as a jpeg image and return it 64 | yield cv2.imencode('.jpg', img)[1].tobytes() 65 | 66 | class CameraParams(): 67 | 68 | gray = False 69 | gaussian = False 70 | sobel = False 71 | canny = False 72 | def __init__(self, gray, gaussian, sobel, canny, yolo): 73 | self.gray = gray 74 | self.gaussian = gaussian 75 | self.sobel = sobel 76 | self.canny = canny 77 | self.yolo 78 | -------------------------------------------------------------------------------- /cam/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /deepsort/.gitignore: -------------------------------------------------------------------------------- 1 | # Folders 2 | __pycache__/ 3 | build/ 4 | *.egg-info 5 | 6 | 7 | # Files 8 | *.weights 9 | *.t7 10 | *.mp4 11 | *.avi 12 | *.so 13 | *.txt 14 | -------------------------------------------------------------------------------- /deepsort/.idea/deep_sort_pytorch-master.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 24 | 25 | -------------------------------------------------------------------------------- /deepsort/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /deepsort/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /deepsort/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /deepsort/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Ziqiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /deepsort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/__init__.py -------------------------------------------------------------------------------- /deepsort/configs/deep_sort.yaml: -------------------------------------------------------------------------------- 1 | DEEPSORT: 2 | REID_CKPT: "./deep_sort/deep/checkpoint/ckpt.t7" 3 | MAX_DIST: 0.2 4 | MIN_CONFIDENCE: 0.3 5 | NMS_MAX_OVERLAP: 0.5 6 | MAX_IOU_DISTANCE: 0.7 7 | MAX_AGE: 70 8 | N_INIT: 3 9 | NN_BUDGET: 100 10 | -------------------------------------------------------------------------------- /deepsort/configs/yolov3.yaml: -------------------------------------------------------------------------------- 1 | YOLOV3: 2 | CFG: "./detector/YOLOv3/cfg/yolo_v3.cfg" 3 | WEIGHT: "./detector/YOLOv3/weight/yolov3.weights" 4 | CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names" 5 | 6 | SCORE_THRESH: 0.5 7 | NMS_THRESH: 0.4 8 | -------------------------------------------------------------------------------- /deepsort/configs/yolov3_tiny.yaml: -------------------------------------------------------------------------------- 1 | YOLOV3: 2 | CFG: "./detector/YOLOv3/cfg/yolov3-tiny.cfg" 3 | WEIGHT: "./detector/YOLOv3/weight/yolov3-tiny.weights" 4 | CLASS_NAMES: "./detector/YOLOv3/cfg/coco.names" 5 | 6 | SCORE_THRESH: 0.5 7 | NMS_THRESH: 0.4 -------------------------------------------------------------------------------- /deepsort/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_sort import DeepSort 2 | 3 | 4 | __all__ = ['DeepSort', 'build_tracker'] 5 | 6 | 7 | def build_tracker(use_cuda): 8 | return DeepSort('./deepsort/deep_sort/deep/checkpoint/ckpt.t7', 9 | max_dist=0.2, min_confidence=0.3, 10 | nms_max_overlap=0.5, max_iou_distance=0.7, 11 | max_age=70, n_init=3, nn_budget=100, use_cuda=use_cuda) 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/deep_sort/deep/__init__.py -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/deep_sort/deep/checkpoint/.gitkeep -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | features = torch.load("features.pth") 4 | qf = features["qf"] 5 | ql = features["ql"] 6 | gf = features["gf"] 7 | gl = features["gl"] 8 | 9 | scores = qf.mm(gf.t()) 10 | res = scores.topk(5, dim=1)[1][:,0] 11 | top1correct = gl[res].eq(ql).sum().item() 12 | 13 | print("Acc top1:{:.3f}".format(top1correct/ql.size(0))) 14 | 15 | 16 | -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | import logging 6 | 7 | from .model import Net 8 | 9 | class Extractor(object): 10 | def __init__(self, model_path, use_cuda=True): 11 | self.net = Net(reid=True) 12 | self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" 13 | state_dict = torch.load(model_path, map_location=lambda storage, loc: storage)['net_dict'] 14 | self.net.load_state_dict(state_dict) 15 | logger = logging.getLogger("root.tracker") 16 | logger.info("Loading weights from {}... Done!".format(model_path)) 17 | self.net.to(self.device) 18 | self.size = (64, 128) 19 | self.norm = transforms.Compose([ 20 | transforms.ToTensor(), 21 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 22 | ]) 23 | 24 | 25 | 26 | def _preprocess(self, im_crops): 27 | """ 28 | TODO: 29 | 1. to float with scale from 0 to 1 30 | 2. resize to (64, 128) as Market1501 dataset did 31 | 3. concatenate to a numpy array 32 | 3. to torch Tensor 33 | 4. normalize 34 | """ 35 | def _resize(im, size): 36 | return cv2.resize(im.astype(np.float32)/255., size) 37 | 38 | im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], dim=0).float() 39 | return im_batch 40 | 41 | 42 | def __call__(self, im_crops): 43 | im_batch = self._preprocess(im_crops) 44 | with torch.no_grad(): 45 | im_batch = im_batch.to(self.device) 46 | features = self.net(im_batch) 47 | return features.cpu().numpy() 48 | 49 | 50 | if __name__ == '__main__': 51 | img = cv2.imread("demo.jpg")[:,:,(2,1,0)] 52 | extr = Extractor("checkpoint/ckpt.t7") 53 | feature = extr(img) 54 | print(feature.shape) 55 | 56 | -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class BasicBlock(nn.Module): 6 | def __init__(self, c_in, c_out,is_downsample=False): 7 | super(BasicBlock,self).__init__() 8 | self.is_downsample = is_downsample 9 | if is_downsample: 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 11 | else: 12 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 13 | self.bn1 = nn.BatchNorm2d(c_out) 14 | self.relu = nn.ReLU(True) 15 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(c_out) 17 | if is_downsample: 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 20 | nn.BatchNorm2d(c_out) 21 | ) 22 | elif c_in != c_out: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 25 | nn.BatchNorm2d(c_out) 26 | ) 27 | self.is_downsample = True 28 | 29 | def forward(self,x): 30 | y = self.conv1(x) 31 | y = self.bn1(y) 32 | y = self.relu(y) 33 | y = self.conv2(y) 34 | y = self.bn2(y) 35 | if self.is_downsample: 36 | x = self.downsample(x) 37 | return F.relu(x.add(y),True) 38 | 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 40 | blocks = [] 41 | for i in range(repeat_times): 42 | if i ==0: 43 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 44 | else: 45 | blocks += [BasicBlock(c_out,c_out),] 46 | return nn.Sequential(*blocks) 47 | 48 | class Net(nn.Module): 49 | def __init__(self, num_classes=751 ,reid=False): 50 | super(Net,self).__init__() 51 | # 3 128 64 52 | self.conv = nn.Sequential( 53 | nn.Conv2d(3,64,3,stride=1,padding=1), 54 | nn.BatchNorm2d(64), 55 | nn.ReLU(inplace=True), 56 | # nn.Conv2d(32,32,3,stride=1,padding=1), 57 | # nn.BatchNorm2d(32), 58 | # nn.ReLU(inplace=True), 59 | nn.MaxPool2d(3,2,padding=1), 60 | ) 61 | # 32 64 32 62 | self.layer1 = make_layers(64,64,2,False) 63 | # 32 64 32 64 | self.layer2 = make_layers(64,128,2,True) 65 | # 64 32 16 66 | self.layer3 = make_layers(128,256,2,True) 67 | # 128 16 8 68 | self.layer4 = make_layers(256,512,2,True) 69 | # 256 8 4 70 | self.avgpool = nn.AvgPool2d((8,4),1) 71 | # 256 1 1 72 | self.reid = reid 73 | self.classifier = nn.Sequential( 74 | nn.Linear(512, 256), 75 | nn.BatchNorm1d(256), 76 | nn.ReLU(inplace=True), 77 | nn.Dropout(), 78 | nn.Linear(256, num_classes), 79 | ) 80 | 81 | def forward(self, x): 82 | x = self.conv(x) 83 | x = self.layer1(x) 84 | x = self.layer2(x) 85 | x = self.layer3(x) 86 | x = self.layer4(x) 87 | x = self.avgpool(x) 88 | x = x.view(x.size(0),-1) 89 | # B x 128 90 | if self.reid: 91 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 92 | return x 93 | # classifier 94 | x = self.classifier(x) 95 | return x 96 | 97 | 98 | if __name__ == '__main__': 99 | net = Net() 100 | x = torch.randn(4,3,128,64) 101 | y = net(x) 102 | import ipdb; ipdb.set_trace() 103 | 104 | 105 | -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/original_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class BasicBlock(nn.Module): 6 | def __init__(self, c_in, c_out,is_downsample=False): 7 | super(BasicBlock,self).__init__() 8 | self.is_downsample = is_downsample 9 | if is_downsample: 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 11 | else: 12 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 13 | self.bn1 = nn.BatchNorm2d(c_out) 14 | self.relu = nn.ReLU(True) 15 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(c_out) 17 | if is_downsample: 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 20 | nn.BatchNorm2d(c_out) 21 | ) 22 | elif c_in != c_out: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 25 | nn.BatchNorm2d(c_out) 26 | ) 27 | self.is_downsample = True 28 | 29 | def forward(self,x): 30 | y = self.conv1(x) 31 | y = self.bn1(y) 32 | y = self.relu(y) 33 | y = self.conv2(y) 34 | y = self.bn2(y) 35 | if self.is_downsample: 36 | x = self.downsample(x) 37 | return F.relu(x.add(y),True) 38 | 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 40 | blocks = [] 41 | for i in range(repeat_times): 42 | if i ==0: 43 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 44 | else: 45 | blocks += [BasicBlock(c_out,c_out),] 46 | return nn.Sequential(*blocks) 47 | 48 | class Net(nn.Module): 49 | def __init__(self, num_classes=625 ,reid=False): 50 | super(Net,self).__init__() 51 | # 3 128 64 52 | self.conv = nn.Sequential( 53 | nn.Conv2d(3,32,3,stride=1,padding=1), 54 | nn.BatchNorm2d(32), 55 | nn.ELU(inplace=True), 56 | nn.Conv2d(32,32,3,stride=1,padding=1), 57 | nn.BatchNorm2d(32), 58 | nn.ELU(inplace=True), 59 | nn.MaxPool2d(3,2,padding=1), 60 | ) 61 | # 32 64 32 62 | self.layer1 = make_layers(32,32,2,False) 63 | # 32 64 32 64 | self.layer2 = make_layers(32,64,2,True) 65 | # 64 32 16 66 | self.layer3 = make_layers(64,128,2,True) 67 | # 128 16 8 68 | self.dense = nn.Sequential( 69 | nn.Dropout(p=0.6), 70 | nn.Linear(128*16*8, 128), 71 | nn.BatchNorm1d(128), 72 | nn.ELU(inplace=True) 73 | ) 74 | # 256 1 1 75 | self.reid = reid 76 | self.batch_norm = nn.BatchNorm1d(128) 77 | self.classifier = nn.Sequential( 78 | nn.Linear(128, num_classes), 79 | ) 80 | 81 | def forward(self, x): 82 | x = self.conv(x) 83 | x = self.layer1(x) 84 | x = self.layer2(x) 85 | x = self.layer3(x) 86 | 87 | x = x.view(x.size(0),-1) 88 | if self.reid: 89 | x = self.dense[0](x) 90 | x = self.dense[1](x) 91 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 92 | return x 93 | x = self.dense(x) 94 | # B x 128 95 | # classifier 96 | x = self.classifier(x) 97 | return x 98 | 99 | 100 | if __name__ == '__main__': 101 | net = Net(reid=True) 102 | x = torch.randn(4,3,128,64) 103 | y = net(x) 104 | import ipdb; ipdb.set_trace() 105 | 106 | 107 | -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.backends.cudnn as cudnn 3 | import torchvision 4 | 5 | import argparse 6 | import os 7 | 8 | from model import Net 9 | 10 | parser = argparse.ArgumentParser(description="Train on market1501") 11 | parser.add_argument("--data-dir",default='data',type=str) 12 | parser.add_argument("--no-cuda",action="store_true") 13 | parser.add_argument("--gpu-id",default=0,type=int) 14 | args = parser.parse_args() 15 | 16 | # device 17 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 18 | if torch.cuda.is_available() and not args.no_cuda: 19 | cudnn.benchmark = True 20 | 21 | # data loader 22 | root = args.data_dir 23 | query_dir = os.path.join(root,"query") 24 | gallery_dir = os.path.join(root,"gallery") 25 | transform = torchvision.transforms.Compose([ 26 | torchvision.transforms.Resize((128,64)), 27 | torchvision.transforms.ToTensor(), 28 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 29 | ]) 30 | queryloader = torch.utils.data.DataLoader( 31 | torchvision.datasets.ImageFolder(query_dir, transform=transform), 32 | batch_size=64, shuffle=False 33 | ) 34 | galleryloader = torch.utils.data.DataLoader( 35 | torchvision.datasets.ImageFolder(gallery_dir, transform=transform), 36 | batch_size=64, shuffle=False 37 | ) 38 | 39 | # net definition 40 | net = Net(reid=True) 41 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 42 | print('Loading from checkpoint/ckpt.t7') 43 | checkpoint = torch.load("./checkpoint/ckpt.t7") 44 | net_dict = checkpoint['net_dict'] 45 | net.load_state_dict(net_dict, strict=False) 46 | net.eval() 47 | net.to(device) 48 | 49 | # compute features 50 | query_features = torch.tensor([]).float() 51 | query_labels = torch.tensor([]).long() 52 | gallery_features = torch.tensor([]).float() 53 | gallery_labels = torch.tensor([]).long() 54 | 55 | with torch.no_grad(): 56 | for idx,(inputs,labels) in enumerate(queryloader): 57 | inputs = inputs.to(device) 58 | features = net(inputs).cpu() 59 | query_features = torch.cat((query_features, features), dim=0) 60 | query_labels = torch.cat((query_labels, labels)) 61 | 62 | for idx,(inputs,labels) in enumerate(galleryloader): 63 | inputs = inputs.to(device) 64 | features = net(inputs).cpu() 65 | gallery_features = torch.cat((gallery_features, features), dim=0) 66 | gallery_labels = torch.cat((gallery_labels, labels)) 67 | 68 | gallery_labels -= 2 69 | 70 | # save features 71 | features = { 72 | "qf": query_features, 73 | "ql": query_labels, 74 | "gf": gallery_features, 75 | "gl": gallery_labels 76 | } 77 | torch.save(features,"features.pth") -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/train.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/deep_sort/deep/train.jpg -------------------------------------------------------------------------------- /deepsort/deep_sort/deep/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | import torch 8 | import torch.backends.cudnn as cudnn 9 | import torchvision 10 | 11 | from model import Net 12 | 13 | parser = argparse.ArgumentParser(description="Train on market1501") 14 | parser.add_argument("--data-dir",default='data',type=str) 15 | parser.add_argument("--no-cuda",action="store_true") 16 | parser.add_argument("--gpu-id",default=0,type=int) 17 | parser.add_argument("--lr",default=0.1, type=float) 18 | parser.add_argument("--interval",'-i',default=20,type=int) 19 | parser.add_argument('--resume', '-r',action='store_true') 20 | args = parser.parse_args() 21 | 22 | # device 23 | device = "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu" 24 | if torch.cuda.is_available() and not args.no_cuda: 25 | cudnn.benchmark = True 26 | 27 | # data loading 28 | root = args.data_dir 29 | train_dir = os.path.join(root,"train") 30 | test_dir = os.path.join(root,"test") 31 | transform_train = torchvision.transforms.Compose([ 32 | torchvision.transforms.RandomCrop((128,64),padding=4), 33 | torchvision.transforms.RandomHorizontalFlip(), 34 | torchvision.transforms.ToTensor(), 35 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 36 | ]) 37 | transform_test = torchvision.transforms.Compose([ 38 | torchvision.transforms.Resize((128,64)), 39 | torchvision.transforms.ToTensor(), 40 | torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 41 | ]) 42 | trainloader = torch.utils.data.DataLoader( 43 | torchvision.datasets.ImageFolder(train_dir, transform=transform_train), 44 | batch_size=64,shuffle=True 45 | ) 46 | testloader = torch.utils.data.DataLoader( 47 | torchvision.datasets.ImageFolder(test_dir, transform=transform_test), 48 | batch_size=64,shuffle=True 49 | ) 50 | num_classes = max(len(trainloader.dataset.classes), len(testloader.dataset.classes)) 51 | 52 | # net definition 53 | start_epoch = 0 54 | net = Net(num_classes=num_classes) 55 | if args.resume: 56 | assert os.path.isfile("./checkpoint/ckpt.t7"), "Error: no checkpoint file found!" 57 | print('Loading from checkpoint/ckpt.t7') 58 | checkpoint = torch.load("./checkpoint/ckpt.t7") 59 | # import ipdb; ipdb.set_trace() 60 | net_dict = checkpoint['net_dict'] 61 | net.load_state_dict(net_dict) 62 | best_acc = checkpoint['acc'] 63 | start_epoch = checkpoint['epoch'] 64 | net.to(device) 65 | 66 | # loss and optimizer 67 | criterion = torch.nn.CrossEntropyLoss() 68 | optimizer = torch.optim.SGD(net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4) 69 | best_acc = 0. 70 | 71 | # train function for each epoch 72 | def train(epoch): 73 | print("\nEpoch : %d"%(epoch+1)) 74 | net.train() 75 | training_loss = 0. 76 | train_loss = 0. 77 | correct = 0 78 | total = 0 79 | interval = args.interval 80 | start = time.time() 81 | for idx, (inputs, labels) in enumerate(trainloader): 82 | # forward 83 | inputs,labels = inputs.to(device),labels.to(device) 84 | outputs = net(inputs) 85 | loss = criterion(outputs, labels) 86 | 87 | # backward 88 | optimizer.zero_grad() 89 | loss.backward() 90 | optimizer.step() 91 | 92 | # accumurating 93 | training_loss += loss.item() 94 | train_loss += loss.item() 95 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 96 | total += labels.size(0) 97 | 98 | # print 99 | if (idx+1)%interval == 0: 100 | end = time.time() 101 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 102 | 100.*(idx+1)/len(trainloader), end-start, training_loss/interval, correct, total, 100.*correct/total 103 | )) 104 | training_loss = 0. 105 | start = time.time() 106 | 107 | return train_loss/len(trainloader), 1.- correct/total 108 | 109 | def test(epoch): 110 | global best_acc 111 | net.eval() 112 | test_loss = 0. 113 | correct = 0 114 | total = 0 115 | start = time.time() 116 | with torch.no_grad(): 117 | for idx, (inputs, labels) in enumerate(testloader): 118 | inputs, labels = inputs.to(device), labels.to(device) 119 | outputs = net(inputs) 120 | loss = criterion(outputs, labels) 121 | 122 | test_loss += loss.item() 123 | correct += outputs.max(dim=1)[1].eq(labels).sum().item() 124 | total += labels.size(0) 125 | 126 | print("Testing ...") 127 | end = time.time() 128 | print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format( 129 | 100.*(idx+1)/len(testloader), end-start, test_loss/len(testloader), correct, total, 100.*correct/total 130 | )) 131 | 132 | # saving checkpoint 133 | acc = 100.*correct/total 134 | if acc > best_acc: 135 | best_acc = acc 136 | print("Saving parameters to checkpoint/ckpt.t7") 137 | checkpoint = { 138 | 'net_dict':net.state_dict(), 139 | 'acc':acc, 140 | 'epoch':epoch, 141 | } 142 | if not os.path.isdir('checkpoint'): 143 | os.mkdir('checkpoint') 144 | torch.save(checkpoint, './checkpoint/ckpt.t7') 145 | 146 | return test_loss/len(testloader), 1.- correct/total 147 | 148 | # plot figure 149 | x_epoch = [] 150 | record = {'train_loss':[], 'train_err':[], 'test_loss':[], 'test_err':[]} 151 | fig = plt.figure() 152 | ax0 = fig.add_subplot(121, title="loss") 153 | ax1 = fig.add_subplot(122, title="top1err") 154 | def draw_curve(epoch, train_loss, train_err, test_loss, test_err): 155 | global record 156 | record['train_loss'].append(train_loss) 157 | record['train_err'].append(train_err) 158 | record['test_loss'].append(test_loss) 159 | record['test_err'].append(test_err) 160 | 161 | x_epoch.append(epoch) 162 | ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train') 163 | ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val') 164 | ax1.plot(x_epoch, record['train_err'], 'bo-', label='train') 165 | ax1.plot(x_epoch, record['test_err'], 'ro-', label='val') 166 | if epoch == 0: 167 | ax0.legend() 168 | ax1.legend() 169 | fig.savefig("train.jpg") 170 | 171 | # lr decay 172 | def lr_decay(): 173 | global optimizer 174 | for params in optimizer.param_groups: 175 | params['lr'] *= 0.1 176 | lr = params['lr'] 177 | print("Learning rate adjusted to {}".format(lr)) 178 | 179 | def main(): 180 | for epoch in range(start_epoch, start_epoch+40): 181 | train_loss, train_err = train(epoch) 182 | test_loss, test_err = test(epoch) 183 | draw_curve(epoch, train_loss, train_err, test_loss, test_err) 184 | if (epoch+1)%20==0: 185 | lr_decay() 186 | 187 | 188 | if __name__ == '__main__': 189 | main() 190 | -------------------------------------------------------------------------------- /deepsort/deep_sort/deep_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from .deep.feature_extractor import Extractor 4 | from .sort.nn_matching import NearestNeighborDistanceMetric 5 | from .sort.preprocessing import non_max_suppression 6 | from .sort.detection import Detection 7 | from .sort.tracker import Tracker 8 | __all__ = ['DeepSort'] 9 | class DeepSort(object): 10 | def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=7000, n_init=3, nn_budget=100, use_cuda=True): 11 | self.min_confidence = min_confidence 12 | self.nms_max_overlap = nms_max_overlap 13 | self.extractor = Extractor(model_path, use_cuda=use_cuda) 14 | max_cosine_distance = max_dist 15 | nn_budget = 100 16 | metric = NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 17 | self.tracker = Tracker(metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) 18 | def update(self, bbox_xywh, confidences, ori_img): 19 | self.height, self.width = ori_img.shape[:2] 20 | # generate detections 21 | features = self._get_features(bbox_xywh, ori_img) 22 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 23 | detections = [Detection(bbox_tlwh[i], conf, features[i]) for i,conf in enumerate(confidences) if conf>self.min_confidence] 24 | boxes = np.array([d.tlwh for d in detections]) 25 | scores = np.array([d.confidence for d in detections]) 26 | indices = non_max_suppression(boxes, self.nms_max_overlap, scores) 27 | detections = [detections[i] for i in indices] 28 | self.tracker.predict() 29 | self.tracker.update(detections) 30 | outputs = [] 31 | for track in self.tracker.tracks: 32 | if not track.is_confirmed() or track.time_since_update > 1: 33 | continue 34 | box = track.to_tlwh() 35 | x1,y1,x2,y2 = self._tlwh_to_xyxy(box) 36 | track_id = track.track_id 37 | outputs.append(np.array([x1,y1,x2,y2,track_id], dtype=np.int)) 38 | if len(outputs) > 0: 39 | outputs = np.stack(outputs,axis=0) 40 | return outputs 41 | @staticmethod 42 | def _xywh_to_tlwh(bbox_xywh): 43 | if isinstance(bbox_xywh, np.ndarray): 44 | bbox_tlwh = bbox_xywh.copy() 45 | elif isinstance(bbox_xywh, torch.Tensor): 46 | bbox_tlwh = bbox_xywh.clone() 47 | bbox_tlwh[:,0] = bbox_xywh[:,0] - bbox_xywh[:,2]/2. 48 | bbox_tlwh[:,1] = bbox_xywh[:,1] - bbox_xywh[:,3]/2. 49 | return bbox_tlwh 50 | def _xywh_to_xyxy(self, bbox_xywh): 51 | x,y,w,h = bbox_xywh 52 | x1 = max(int(x-w/2),0) 53 | x2 = min(int(x+w/2),self.width-1) 54 | y1 = max(int(y-h/2),0) 55 | y2 = min(int(y+h/2),self.height-1) 56 | return x1,y1,x2,y2 57 | def _tlwh_to_xyxy(self, bbox_tlwh): 58 | x,y,w,h = bbox_tlwh 59 | x1 = max(int(x),0) 60 | x2 = min(int(x+w),self.width-1) 61 | y1 = max(int(y),0) 62 | y2 = min(int(y+h),self.height-1) 63 | return x1,y1,x2,y2 64 | def _xyxy_to_tlwh(self, bbox_xyxy): 65 | x1,y1,x2,y2 = bbox_xyxy 66 | t = x1 67 | l = y1 68 | w = int(x2-x1) 69 | h = int(y2-y1) 70 | return t,l,w,h 71 | def _get_features(self, bbox_xywh, ori_img): 72 | im_crops = [] 73 | for box in bbox_xywh: 74 | x1,y1,x2,y2 = self._xywh_to_xyxy(box) 75 | im = ori_img[y1:y2,x1:x2] 76 | im_crops.append(im) 77 | if im_crops: 78 | features = self.extractor(im_crops) 79 | else: 80 | features = np.array([]) 81 | return features -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/deep_sort/sort/__init__.py -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | def to_tlbr(self): 35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 36 | `(top left, bottom right)`. 37 | """ 38 | ret = self.tlwh.copy() 39 | ret[2:] += ret[:2] 40 | return ret 41 | 42 | def to_xyah(self): 43 | """Convert bounding box to format `(center x, center y, aspect ratio, 44 | height)`, where the aspect ratio is `width / height`. 45 | """ 46 | ret = self.tlwh.copy() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(self._motion_mat, mean) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def update(self, mean, covariance, measurement): 155 | """Run Kalman filter correction step. 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The predicted state's mean vector (8 dimensional). 161 | covariance : ndarray 162 | The state's covariance matrix (8x8 dimensional). 163 | measurement : ndarray 164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 165 | is the center position, a the aspect ratio, and h the height of the 166 | bounding box. 167 | 168 | Returns 169 | ------- 170 | (ndarray, ndarray) 171 | Returns the measurement-corrected state distribution. 172 | 173 | """ 174 | projected_mean, projected_cov = self.project(mean, covariance) 175 | 176 | chol_factor, lower = scipy.linalg.cho_factor( 177 | projected_cov, lower=True, check_finite=False) 178 | kalman_gain = scipy.linalg.cho_solve( 179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 180 | check_finite=False).T 181 | innovation = measurement - projected_mean 182 | 183 | new_mean = mean + np.dot(innovation, kalman_gain.T) 184 | new_covariance = covariance - np.linalg.multi_dot(( 185 | kalman_gain, projected_cov, kalman_gain.T)) 186 | return new_mean, new_covariance 187 | 188 | def gating_distance(self, mean, covariance, measurements, 189 | only_position=False): 190 | """Compute gating distance between state distribution and measurements. 191 | 192 | A suitable distance threshold can be obtained from `chi2inv95`. If 193 | `only_position` is False, the chi-square distribution has 4 degrees of 194 | freedom, otherwise 2. 195 | 196 | Parameters 197 | ---------- 198 | mean : ndarray 199 | Mean vector over the state distribution (8 dimensional). 200 | covariance : ndarray 201 | Covariance of the state distribution (8x8 dimensional). 202 | measurements : ndarray 203 | An Nx4 dimensional matrix of N measurements, each in 204 | format (x, y, a, h) where (x, y) is the bounding box center 205 | position, a the aspect ratio, and h the height. 206 | only_position : Optional[bool] 207 | If True, distance computation is done with respect to the bounding 208 | box center position only. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | Returns an array of length N, where the i-th element contains the 214 | squared Mahalanobis distance between (mean, covariance) and 215 | `measurements[i]`. 216 | 217 | """ 218 | mean, covariance = self.project(mean, covariance) 219 | if only_position: 220 | mean, covariance = mean[:2], covariance[:2, :2] 221 | measurements = measurements[:, :2] 222 | 223 | cholesky_factor = np.linalg.cholesky(covariance) 224 | d = measurements - mean 225 | z = scipy.linalg.solve_triangular( 226 | cholesky_factor, d.T, lower=True, check_finite=False, 227 | overwrite_b=True) 228 | squared_maha = np.sum(z * z, axis=0) 229 | return squared_maha 230 | -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | # from sklearn.utils.linear_assignment_ import linear_assignment 5 | from scipy.optimize import linear_sum_assignment as linear_assignment 6 | from . import kalman_filter 7 | 8 | 9 | INFTY_COST = 1e+5 10 | 11 | 12 | def min_cost_matching( 13 | distance_metric, max_distance, tracks, detections, track_indices=None, 14 | detection_indices=None): 15 | """Solve linear assignment problem. 16 | 17 | Parameters 18 | ---------- 19 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 20 | The distance metric is given a list of tracks and detections as well as 21 | a list of N track indices and M detection indices. The metric should 22 | return the NxM dimensional cost matrix, where element (i, j) is the 23 | association cost between the i-th track in the given track indices and 24 | the j-th detection in the given detection_indices. 25 | max_distance : float 26 | Gating threshold. Associations with cost larger than this value are 27 | disregarded. 28 | tracks : List[track.Track] 29 | A list of predicted tracks at the current time step. 30 | detections : List[detection.Detection] 31 | A list of detections at the current time step. 32 | track_indices : List[int] 33 | List of track indices that maps rows in `cost_matrix` to tracks in 34 | `tracks` (see description above). 35 | detection_indices : List[int] 36 | List of detection indices that maps columns in `cost_matrix` to 37 | detections in `detections` (see description above). 38 | 39 | Returns 40 | ------- 41 | (List[(int, int)], List[int], List[int]) 42 | Returns a tuple with the following three entries: 43 | * A list of matched track and detection indices. 44 | * A list of unmatched track indices. 45 | * A list of unmatched detection indices. 46 | 47 | """ 48 | if track_indices is None: 49 | track_indices = np.arange(len(tracks)) 50 | if detection_indices is None: 51 | detection_indices = np.arange(len(detections)) 52 | 53 | if len(detection_indices) == 0 or len(track_indices) == 0: 54 | return [], track_indices, detection_indices # Nothing to match. 55 | 56 | cost_matrix = distance_metric( 57 | tracks, detections, track_indices, detection_indices) 58 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 59 | 60 | row_indices, col_indices = linear_assignment(cost_matrix) 61 | 62 | matches, unmatched_tracks, unmatched_detections = [], [], [] 63 | for col, detection_idx in enumerate(detection_indices): 64 | if col not in col_indices: 65 | unmatched_detections.append(detection_idx) 66 | for row, track_idx in enumerate(track_indices): 67 | if row not in row_indices: 68 | unmatched_tracks.append(track_idx) 69 | for row, col in zip(row_indices, col_indices): 70 | track_idx = track_indices[row] 71 | detection_idx = detection_indices[col] 72 | if cost_matrix[row, col] > max_distance: 73 | unmatched_tracks.append(track_idx) 74 | unmatched_detections.append(detection_idx) 75 | else: 76 | matches.append((track_idx, detection_idx)) 77 | return matches, unmatched_tracks, unmatched_detections 78 | 79 | 80 | def matching_cascade( 81 | distance_metric, max_distance, cascade_depth, tracks, detections, 82 | track_indices=None, detection_indices=None): 83 | """Run matching cascade. 84 | 85 | Parameters 86 | ---------- 87 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 88 | The distance metric is given a list of tracks and detections as well as 89 | a list of N track indices and M detection indices. The metric should 90 | return the NxM dimensional cost matrix, where element (i, j) is the 91 | association cost between the i-th track in the given track indices and 92 | the j-th detection in the given detection indices. 93 | max_distance : float 94 | Gating threshold. Associations with cost larger than this value are 95 | disregarded. 96 | cascade_depth: int 97 | The cascade depth, should be se to the maximum track age. 98 | tracks : List[track.Track] 99 | A list of predicted tracks at the current time step. 100 | detections : List[detection.Detection] 101 | A list of detections at the current time step. 102 | track_indices : Optional[List[int]] 103 | List of track indices that maps rows in `cost_matrix` to tracks in 104 | `tracks` (see description above). Defaults to all tracks. 105 | detection_indices : Optional[List[int]] 106 | List of detection indices that maps columns in `cost_matrix` to 107 | detections in `detections` (see description above). Defaults to all 108 | detections. 109 | 110 | Returns 111 | ------- 112 | (List[(int, int)], List[int], List[int]) 113 | Returns a tuple with the following three entries: 114 | * A list of matched track and detection indices. 115 | * A list of unmatched track indices. 116 | * A list of unmatched detection indices. 117 | 118 | """ 119 | if track_indices is None: 120 | track_indices = list(range(len(tracks))) 121 | if detection_indices is None: 122 | detection_indices = list(range(len(detections))) 123 | 124 | unmatched_detections = detection_indices 125 | matches = [] 126 | for level in range(cascade_depth): 127 | if len(unmatched_detections) == 0: # No detections left 128 | break 129 | 130 | track_indices_l = [ 131 | k for k in track_indices 132 | if tracks[k].time_since_update == 1 + level 133 | ] 134 | if len(track_indices_l) == 0: # Nothing to match at this level 135 | continue 136 | 137 | matches_l, _, unmatched_detections = \ 138 | min_cost_matching( 139 | distance_metric, max_distance, tracks, detections, 140 | track_indices_l, unmatched_detections) 141 | matches += matches_l 142 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 143 | return matches, unmatched_tracks, unmatched_detections 144 | 145 | 146 | def gate_cost_matrix( 147 | kf, cost_matrix, tracks, detections, track_indices, detection_indices, 148 | gated_cost=INFTY_COST, only_position=False): 149 | """Invalidate infeasible entries in cost matrix based on the state 150 | distributions obtained by Kalman filtering. 151 | 152 | Parameters 153 | ---------- 154 | kf : The Kalman filter. 155 | cost_matrix : ndarray 156 | The NxM dimensional cost matrix, where N is the number of track indices 157 | and M is the number of detection indices, such that entry (i, j) is the 158 | association cost between `tracks[track_indices[i]]` and 159 | `detections[detection_indices[j]]`. 160 | tracks : List[track.Track] 161 | A list of predicted tracks at the current time step. 162 | detections : List[detection.Detection] 163 | A list of detections at the current time step. 164 | track_indices : List[int] 165 | List of track indices that maps rows in `cost_matrix` to tracks in 166 | `tracks` (see description above). 167 | detection_indices : List[int] 168 | List of detection indices that maps columns in `cost_matrix` to 169 | detections in `detections` (see description above). 170 | gated_cost : Optional[float] 171 | Entries in the cost matrix corresponding to infeasible associations are 172 | set this value. Defaults to a very large value. 173 | only_position : Optional[bool] 174 | If True, only the x, y position of the state distribution is considered 175 | during gating. Defaults to False. 176 | 177 | Returns 178 | ------- 179 | ndarray 180 | Returns the modified cost matrix. 181 | 182 | """ 183 | gating_dim = 2 if only_position else 4 184 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 185 | measurements = np.asarray( 186 | [detections[i].to_xyah() for i in detection_indices]) 187 | for row, track_idx in enumerate(track_indices): 188 | track = tracks[track_idx] 189 | gating_distance = kf.gating_distance( 190 | track.mean, track.covariance, measurements, only_position) 191 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 192 | return cost_matrix 193 | -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | 126 | if metric == "euclidean": 127 | self._metric = _nn_euclidean_distance 128 | elif metric == "cosine": 129 | self._metric = _nn_cosine_distance 130 | else: 131 | raise ValueError( 132 | "Invalid metric; must be either 'euclidean' or 'cosine'") 133 | self.matching_threshold = matching_threshold 134 | self.budget = budget 135 | self.samples = {} 136 | 137 | def partial_fit(self, features, targets, active_targets): 138 | """Update the distance metric with new data. 139 | 140 | Parameters 141 | ---------- 142 | features : ndarray 143 | An NxM matrix of N features of dimensionality M. 144 | targets : ndarray 145 | An integer array of associated target identities. 146 | active_targets : List[int] 147 | A list of targets that are currently present in the scene. 148 | 149 | """ 150 | for feature, target in zip(features, targets): 151 | self.samples.setdefault(target, []).append(feature) 152 | if self.budget is not None: 153 | self.samples[target] = self.samples[target][-self.budget:] 154 | self.samples = {k: self.samples[k] for k in active_targets} 155 | 156 | def distance(self, features, targets): 157 | """Compute distance between features and targets. 158 | 159 | Parameters 160 | ---------- 161 | features : ndarray 162 | An NxM matrix of N features of dimensionality M. 163 | targets : List[int] 164 | A list of targets to match the given `features` against. 165 | 166 | Returns 167 | ------- 168 | ndarray 169 | Returns a cost matrix of shape len(targets), len(features), where 170 | element (i, j) contains the closest squared distance between 171 | `targets[i]` and `features[j]`. 172 | 173 | """ 174 | cost_matrix = np.zeros((len(targets), len(features))) 175 | for i, target in enumerate(targets): 176 | cost_matrix[i, :] = self._metric(self.samples[target], features) 177 | return cost_matrix 178 | -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | 74 | return pick 75 | -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_id, n_init, max_age, 67 | feature=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_id = track_id 71 | self.hits = 1 72 | self.age = 1 73 | self.time_since_update = 0 74 | 75 | self.state = TrackState.Tentative 76 | self.features = [] 77 | if feature is not None: 78 | self.features.append(feature) 79 | 80 | self._n_init = n_init 81 | self._max_age = max_age 82 | 83 | def to_tlwh(self): 84 | """Get current position in bounding box format `(top left x, top left y, 85 | width, height)`. 86 | 87 | Returns 88 | ------- 89 | ndarray 90 | The bounding box. 91 | 92 | """ 93 | ret = self.mean[:4].copy() 94 | ret[2] *= ret[3] 95 | ret[:2] -= ret[2:] / 2 96 | return ret 97 | 98 | def to_tlbr(self): 99 | """Get current position in bounding box format `(min x, miny, max x, 100 | max y)`. 101 | 102 | Returns 103 | ------- 104 | ndarray 105 | The bounding box. 106 | 107 | """ 108 | ret = self.to_tlwh() 109 | ret[2:] = ret[:2] + ret[2:] 110 | return ret 111 | 112 | def predict(self, kf): 113 | """Propagate the state distribution to the current time step using a 114 | Kalman filter prediction step. 115 | 116 | Parameters 117 | ---------- 118 | kf : kalman_filter.KalmanFilter 119 | The Kalman filter. 120 | 121 | """ 122 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 123 | self.age += 1 124 | self.time_since_update += 1 125 | 126 | def update(self, kf, detection): 127 | """Perform Kalman filter measurement update step and update the feature 128 | cache. 129 | 130 | Parameters 131 | ---------- 132 | kf : kalman_filter.KalmanFilter 133 | The Kalman filter. 134 | detection : Detection 135 | The associated detection. 136 | 137 | """ 138 | self.mean, self.covariance = kf.update( 139 | self.mean, self.covariance, detection.to_xyah()) 140 | self.features.append(detection.feature) 141 | 142 | self.hits += 1 143 | self.time_since_update = 0 144 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 145 | self.state = TrackState.Confirmed 146 | 147 | def mark_missed(self): 148 | """Mark this track as missed (no association at the current time step). 149 | """ 150 | if self.state == TrackState.Tentative: 151 | self.state = TrackState.Deleted 152 | elif self.time_since_update > self._max_age: 153 | self.state = TrackState.Deleted 154 | 155 | def is_tentative(self): 156 | """Returns True if this track is tentative (unconfirmed). 157 | """ 158 | return self.state == TrackState.Tentative 159 | 160 | def is_confirmed(self): 161 | """Returns True if this track is confirmed.""" 162 | return self.state == TrackState.Confirmed 163 | 164 | def is_deleted(self): 165 | """Returns True if this track is dead and should be deleted.""" 166 | return self.state == TrackState.Deleted 167 | -------------------------------------------------------------------------------- /deepsort/deep_sort/sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=7000, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def update(self, detections): 59 | """Perform measurement update and track management. 60 | 61 | Parameters 62 | ---------- 63 | detections : List[deep_sort.detection.Detection] 64 | A list of detections at the current time step. 65 | 66 | """ 67 | # Run matching cascade. 68 | matches, unmatched_tracks, unmatched_detections = \ 69 | self._match(detections) 70 | 71 | # Update track set. 72 | for track_idx, detection_idx in matches: 73 | self.tracks[track_idx].update( 74 | self.kf, detections[detection_idx]) 75 | for track_idx in unmatched_tracks: 76 | self.tracks[track_idx].mark_missed() 77 | for detection_idx in unmatched_detections: 78 | self._initiate_track(detections[detection_idx]) 79 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 80 | 81 | # Update distance metric. 82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 83 | features, targets = [], [] 84 | for track in self.tracks: 85 | if not track.is_confirmed(): 86 | continue 87 | features += track.features 88 | targets += [track.track_id for _ in track.features] 89 | track.features = [] 90 | self.metric.partial_fit( 91 | np.asarray(features), np.asarray(targets), active_targets) 92 | 93 | def _match(self, detections): 94 | 95 | def gated_metric(tracks, dets, track_indices, detection_indices): 96 | features = np.array([dets[i].feature for i in detection_indices]) 97 | targets = np.array([tracks[i].track_id for i in track_indices]) 98 | cost_matrix = self.metric.distance(features, targets) 99 | cost_matrix = linear_assignment.gate_cost_matrix( 100 | self.kf, cost_matrix, tracks, dets, track_indices, 101 | detection_indices) 102 | 103 | return cost_matrix 104 | 105 | # Split track set into confirmed and unconfirmed tracks. 106 | confirmed_tracks = [ 107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 108 | unconfirmed_tracks = [ 109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 110 | 111 | # Associate confirmed tracks using appearance features. 112 | matches_a, unmatched_tracks_a, unmatched_detections = \ 113 | linear_assignment.matching_cascade( 114 | gated_metric, self.metric.matching_threshold, self.max_age, 115 | self.tracks, detections, confirmed_tracks) 116 | 117 | # Associate remaining tracks together with unconfirmed tracks using IOU. 118 | iou_track_candidates = unconfirmed_tracks + [ 119 | k for k in unmatched_tracks_a if 120 | self.tracks[k].time_since_update == 1] 121 | unmatched_tracks_a = [ 122 | k for k in unmatched_tracks_a if 123 | self.tracks[k].time_since_update != 1] 124 | matches_b, unmatched_tracks_b, unmatched_detections = \ 125 | linear_assignment.min_cost_matching( 126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 127 | detections, iou_track_candidates, unmatched_detections) 128 | 129 | matches = matches_a + matches_b 130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 131 | return matches, unmatched_tracks, unmatched_detections 132 | 133 | def _initiate_track(self, detection): 134 | mean, covariance = self.kf.initiate(detection.to_xyah()) 135 | self.tracks.append(Track( 136 | mean, covariance, self._next_id, self.n_init, self.max_age, 137 | detection.feature)) 138 | self._next_id += 1 139 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/README.md: -------------------------------------------------------------------------------- 1 | # YOLOv3 for detection 2 | 3 | This is an implemention of YOLOv3 with only the forward part. 4 | 5 | If you want to train YOLOv3 on your custom dataset, please search `YOLOv3` on github. 6 | 7 | ## Quick forward 8 | ```bash 9 | cd YOLOv3 10 | python 11 | ``` -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("detector/YOLOv3") 3 | 4 | 5 | from detector import YOLOv3 6 | __all__ = ['YOLOv3'] 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .yolo_utils import convert2cpu 3 | 4 | 5 | def parse_cfg(cfgfile): 6 | blocks = [] 7 | fp = open(cfgfile) 8 | block = None 9 | line = fp.readline() 10 | while line != '': 11 | line = line.rstrip() 12 | if line == '' or line[0] == '#': 13 | line = fp.readline() 14 | continue 15 | elif line[0] == '[': 16 | if block: 17 | blocks.append(block) 18 | block = dict() 19 | block['type'] = line.lstrip('[').rstrip(']') 20 | # set default value 21 | if block['type'] == 'convolutional': 22 | block['batch_normalize'] = 0 23 | else: 24 | key, value = line.split('=') 25 | key = key.strip() 26 | if key == 'type': 27 | key = '_type' 28 | value = value.strip() 29 | block[key] = value 30 | line = fp.readline() 31 | 32 | if block: 33 | blocks.append(block) 34 | fp.close() 35 | return blocks 36 | 37 | 38 | def print_cfg(blocks): 39 | print('layer filters size input output'); 40 | prev_width = 416 41 | prev_height = 416 42 | prev_filters = 3 43 | out_filters = [] 44 | out_widths = [] 45 | out_heights = [] 46 | ind = -2 47 | for block in blocks: 48 | ind += 1 49 | if block['type'] == 'net': 50 | prev_width = int(block['width']) 51 | prev_height = int(block['height']) 52 | continue 53 | elif block['type'] == 'convolutional': 54 | filters = int(block['filters']) 55 | kernel_size = int(block['size']) 56 | stride = int(block['stride']) 57 | is_pad = int(block['pad']) 58 | pad = (kernel_size - 1) // 2 if is_pad else 0 59 | width = (prev_width + 2 * pad - kernel_size) // stride + 1 60 | height = (prev_height + 2 * pad - kernel_size) // stride + 1 61 | print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 62 | ind, 'conv', filters, kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width, 63 | height, filters)) 64 | prev_width = width 65 | prev_height = height 66 | prev_filters = filters 67 | out_widths.append(prev_width) 68 | out_heights.append(prev_height) 69 | out_filters.append(prev_filters) 70 | elif block['type'] == 'maxpool': 71 | pool_size = int(block['size']) 72 | stride = int(block['stride']) 73 | width = prev_width // stride 74 | height = prev_height // stride 75 | print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 76 | ind, 'max', pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, filters)) 77 | prev_width = width 78 | prev_height = height 79 | prev_filters = filters 80 | out_widths.append(prev_width) 81 | out_heights.append(prev_height) 82 | out_filters.append(prev_filters) 83 | elif block['type'] == 'avgpool': 84 | width = 1 85 | height = 1 86 | print('%5d %-6s %3d x %3d x%4d -> %3d' % ( 87 | ind, 'avg', prev_width, prev_height, prev_filters, prev_filters)) 88 | prev_width = width 89 | prev_height = height 90 | prev_filters = filters 91 | out_widths.append(prev_width) 92 | out_heights.append(prev_height) 93 | out_filters.append(prev_filters) 94 | elif block['type'] == 'softmax': 95 | print('%5d %-6s -> %3d' % (ind, 'softmax', prev_filters)) 96 | out_widths.append(prev_width) 97 | out_heights.append(prev_height) 98 | out_filters.append(prev_filters) 99 | elif block['type'] == 'cost': 100 | print('%5d %-6s -> %3d' % (ind, 'cost', prev_filters)) 101 | out_widths.append(prev_width) 102 | out_heights.append(prev_height) 103 | out_filters.append(prev_filters) 104 | elif block['type'] == 'reorg': 105 | stride = int(block['stride']) 106 | filters = stride * stride * prev_filters 107 | width = prev_width // stride 108 | height = prev_height // stride 109 | print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 110 | ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters)) 111 | prev_width = width 112 | prev_height = height 113 | prev_filters = filters 114 | out_widths.append(prev_width) 115 | out_heights.append(prev_height) 116 | out_filters.append(prev_filters) 117 | elif block['type'] == 'upsample': 118 | stride = int(block['stride']) 119 | filters = prev_filters 120 | width = prev_width * stride 121 | height = prev_height * stride 122 | print('%5d %-6s * %d %3d x %3d x%4d -> %3d x %3d x%4d' % ( 123 | ind, 'upsample', stride, prev_width, prev_height, prev_filters, width, height, filters)) 124 | prev_width = width 125 | prev_height = height 126 | prev_filters = filters 127 | out_widths.append(prev_width) 128 | out_heights.append(prev_height) 129 | out_filters.append(prev_filters) 130 | elif block['type'] == 'route': 131 | layers = block['layers'].split(',') 132 | layers = [int(i) if int(i) > 0 else int(i) + ind for i in layers] 133 | if len(layers) == 1: 134 | print('%5d %-6s %d' % (ind, 'route', layers[0])) 135 | prev_width = out_widths[layers[0]] 136 | prev_height = out_heights[layers[0]] 137 | prev_filters = out_filters[layers[0]] 138 | elif len(layers) == 2: 139 | print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1])) 140 | prev_width = out_widths[layers[0]] 141 | prev_height = out_heights[layers[0]] 142 | assert (prev_width == out_widths[layers[1]]) 143 | assert (prev_height == out_heights[layers[1]]) 144 | prev_filters = out_filters[layers[0]] + out_filters[layers[1]] 145 | out_widths.append(prev_width) 146 | out_heights.append(prev_height) 147 | out_filters.append(prev_filters) 148 | elif block['type'] in ['region', 'yolo']: 149 | print('%5d %-6s' % (ind, 'detection')) 150 | out_widths.append(prev_width) 151 | out_heights.append(prev_height) 152 | out_filters.append(prev_filters) 153 | elif block['type'] == 'shortcut': 154 | from_id = int(block['from']) 155 | from_id = from_id if from_id > 0 else from_id + ind 156 | print('%5d %-6s %d' % (ind, 'shortcut', from_id)) 157 | prev_width = out_widths[from_id] 158 | prev_height = out_heights[from_id] 159 | prev_filters = out_filters[from_id] 160 | out_widths.append(prev_width) 161 | out_heights.append(prev_height) 162 | out_filters.append(prev_filters) 163 | elif block['type'] == 'connected': 164 | filters = int(block['output']) 165 | print('%5d %-6s %d -> %3d' % (ind, 'connected', prev_filters, filters)) 166 | prev_filters = filters 167 | out_widths.append(1) 168 | out_heights.append(1) 169 | out_filters.append(prev_filters) 170 | else: 171 | print('unknown type %s' % (block['type'])) 172 | 173 | 174 | def load_conv(buf, start, conv_model): 175 | num_w = conv_model.weight.numel() 176 | num_b = conv_model.bias.numel() 177 | # print("start: {}, num_w: {}, num_b: {}".format(start, num_w, num_b)) 178 | # by ysyun, use .view_as() 179 | conv_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b]).view_as(conv_model.bias.data)); 180 | start = start + num_b 181 | conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data)); 182 | start = start + num_w 183 | return start 184 | 185 | 186 | def save_conv(fp, conv_model): 187 | if conv_model.bias.is_cuda: 188 | convert2cpu(conv_model.bias.data).numpy().tofile(fp) 189 | convert2cpu(conv_model.weight.data).numpy().tofile(fp) 190 | else: 191 | conv_model.bias.data.numpy().tofile(fp) 192 | conv_model.weight.data.numpy().tofile(fp) 193 | 194 | 195 | def load_conv_bn(buf, start, conv_model, bn_model): 196 | num_w = conv_model.weight.numel() 197 | num_b = bn_model.bias.numel() 198 | bn_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])); 199 | start = start + num_b 200 | bn_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_b])); 201 | start = start + num_b 202 | bn_model.running_mean.copy_(torch.from_numpy(buf[start:start + num_b])); 203 | start = start + num_b 204 | bn_model.running_var.copy_(torch.from_numpy(buf[start:start + num_b])); 205 | start = start + num_b 206 | # conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])); start = start + num_w 207 | conv_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w]).view_as(conv_model.weight.data)); 208 | start = start + num_w 209 | return start 210 | 211 | 212 | def save_conv_bn(fp, conv_model, bn_model): 213 | if bn_model.bias.is_cuda: 214 | convert2cpu(bn_model.bias.data).numpy().tofile(fp) 215 | convert2cpu(bn_model.weight.data).numpy().tofile(fp) 216 | convert2cpu(bn_model.running_mean).numpy().tofile(fp) 217 | convert2cpu(bn_model.running_var).numpy().tofile(fp) 218 | convert2cpu(conv_model.weight.data).numpy().tofile(fp) 219 | else: 220 | bn_model.bias.data.numpy().tofile(fp) 221 | bn_model.weight.data.numpy().tofile(fp) 222 | bn_model.running_mean.numpy().tofile(fp) 223 | bn_model.running_var.numpy().tofile(fp) 224 | conv_model.weight.data.numpy().tofile(fp) 225 | 226 | 227 | def load_fc(buf, start, fc_model): 228 | num_w = fc_model.weight.numel() 229 | num_b = fc_model.bias.numel() 230 | fc_model.bias.data.copy_(torch.from_numpy(buf[start:start + num_b])); 231 | start = start + num_b 232 | fc_model.weight.data.copy_(torch.from_numpy(buf[start:start + num_w])); 233 | start = start + num_w 234 | return start 235 | 236 | 237 | def save_fc(fp, fc_model): 238 | fc_model.bias.data.numpy().tofile(fp) 239 | fc_model.weight.data.numpy().tofile(fp) 240 | 241 | 242 | if __name__ == '__main__': 243 | import sys 244 | 245 | blocks = parse_cfg('cfg/yolo.cfg') 246 | if len(sys.argv) == 2: 247 | blocks = parse_cfg(sys.argv[1]) 248 | print_cfg(blocks) 249 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/coco.data: -------------------------------------------------------------------------------- 1 | train = coco_train.txt 2 | valid = coco_test.txt 3 | names = data/coco.names 4 | backup = backup 5 | gpus = 0,1,2,3 6 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/darknet19_448.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=4 4 | height=448 5 | width=448 6 | max_crop=512 7 | channels=3 8 | momentum=0.9 9 | decay=0.0005 10 | 11 | learning_rate=0.001 12 | policy=poly 13 | power=4 14 | max_batches=100000 15 | 16 | angle=7 17 | hue = .1 18 | saturation=.75 19 | exposure=.75 20 | aspect=.75 21 | 22 | [convolutional] 23 | batch_normalize=1 24 | filters=32 25 | size=3 26 | stride=1 27 | pad=1 28 | activation=leaky 29 | 30 | [maxpool] 31 | size=2 32 | stride=2 33 | 34 | [convolutional] 35 | batch_normalize=1 36 | filters=64 37 | size=3 38 | stride=1 39 | pad=1 40 | activation=leaky 41 | 42 | [maxpool] 43 | size=2 44 | stride=2 45 | 46 | [convolutional] 47 | batch_normalize=1 48 | filters=128 49 | size=3 50 | stride=1 51 | pad=1 52 | activation=leaky 53 | 54 | [convolutional] 55 | batch_normalize=1 56 | filters=64 57 | size=1 58 | stride=1 59 | pad=1 60 | activation=leaky 61 | 62 | [convolutional] 63 | batch_normalize=1 64 | filters=128 65 | size=3 66 | stride=1 67 | pad=1 68 | activation=leaky 69 | 70 | [maxpool] 71 | size=2 72 | stride=2 73 | 74 | [convolutional] 75 | batch_normalize=1 76 | filters=256 77 | size=3 78 | stride=1 79 | pad=1 80 | activation=leaky 81 | 82 | [convolutional] 83 | batch_normalize=1 84 | filters=128 85 | size=1 86 | stride=1 87 | pad=1 88 | activation=leaky 89 | 90 | [convolutional] 91 | batch_normalize=1 92 | filters=256 93 | size=3 94 | stride=1 95 | pad=1 96 | activation=leaky 97 | 98 | [maxpool] 99 | size=2 100 | stride=2 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | filters=512 105 | size=3 106 | stride=1 107 | pad=1 108 | activation=leaky 109 | 110 | [convolutional] 111 | batch_normalize=1 112 | filters=256 113 | size=1 114 | stride=1 115 | pad=1 116 | activation=leaky 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=512 121 | size=3 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [convolutional] 127 | batch_normalize=1 128 | filters=256 129 | size=1 130 | stride=1 131 | pad=1 132 | activation=leaky 133 | 134 | [convolutional] 135 | batch_normalize=1 136 | filters=512 137 | size=3 138 | stride=1 139 | pad=1 140 | activation=leaky 141 | 142 | [maxpool] 143 | size=2 144 | stride=2 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=1024 149 | size=3 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [convolutional] 155 | batch_normalize=1 156 | filters=512 157 | size=1 158 | stride=1 159 | pad=1 160 | activation=leaky 161 | 162 | [convolutional] 163 | batch_normalize=1 164 | filters=1024 165 | size=3 166 | stride=1 167 | pad=1 168 | activation=leaky 169 | 170 | [convolutional] 171 | batch_normalize=1 172 | filters=512 173 | size=1 174 | stride=1 175 | pad=1 176 | activation=leaky 177 | 178 | [convolutional] 179 | batch_normalize=1 180 | filters=1024 181 | size=3 182 | stride=1 183 | pad=1 184 | activation=leaky 185 | 186 | [convolutional] 187 | filters=1000 188 | size=1 189 | stride=1 190 | pad=1 191 | activation=linear 192 | 193 | [avgpool] 194 | 195 | [softmax] 196 | groups=1 197 | 198 | [cost] 199 | type=sse 200 | 201 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/tiny-yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=8 4 | width=416 5 | height=416 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | angle=0 10 | saturation = 1.5 11 | exposure = 1.5 12 | hue=.1 13 | 14 | learning_rate=0.001 15 | max_batches = 40200 16 | policy=steps 17 | steps=-1,100,20000,30000 18 | scales=.1,10,.1,.1 19 | 20 | [convolutional] 21 | batch_normalize=1 22 | filters=16 23 | size=3 24 | stride=1 25 | pad=1 26 | activation=leaky 27 | 28 | [maxpool] 29 | size=2 30 | stride=2 31 | 32 | [convolutional] 33 | batch_normalize=1 34 | filters=32 35 | size=3 36 | stride=1 37 | pad=1 38 | activation=leaky 39 | 40 | [maxpool] 41 | size=2 42 | stride=2 43 | 44 | [convolutional] 45 | batch_normalize=1 46 | filters=64 47 | size=3 48 | stride=1 49 | pad=1 50 | activation=leaky 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=128 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [maxpool] 65 | size=2 66 | stride=2 67 | 68 | [convolutional] 69 | batch_normalize=1 70 | filters=256 71 | size=3 72 | stride=1 73 | pad=1 74 | activation=leaky 75 | 76 | [maxpool] 77 | size=2 78 | stride=2 79 | 80 | [convolutional] 81 | batch_normalize=1 82 | filters=512 83 | size=3 84 | stride=1 85 | pad=1 86 | activation=leaky 87 | 88 | [maxpool] 89 | size=2 90 | stride=1 91 | 92 | [convolutional] 93 | batch_normalize=1 94 | filters=1024 95 | size=3 96 | stride=1 97 | pad=1 98 | activation=leaky 99 | 100 | ########### 101 | 102 | [convolutional] 103 | batch_normalize=1 104 | size=3 105 | stride=1 106 | pad=1 107 | filters=1024 108 | activation=leaky 109 | 110 | [convolutional] 111 | size=1 112 | stride=1 113 | pad=1 114 | filters=125 115 | activation=linear 116 | 117 | [region] 118 | anchors = 1.08,1.19, 3.42,4.41, 6.63,11.38, 9.42,5.11, 16.62,10.52 119 | bias_match=1 120 | classes=20 121 | coords=4 122 | num=5 123 | softmax=1 124 | jitter=.2 125 | rescore=1 126 | 127 | object_scale=5 128 | noobject_scale=1 129 | class_scale=1 130 | coord_scale=1 131 | 132 | absolute=1 133 | thresh = .6 134 | random=1 135 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/tiny-yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Training 3 | # batch=64 4 | # subdivisions=2 5 | # Testing 6 | batch=1 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | size=3 110 | stride=1 111 | pad=1 112 | filters=512 113 | activation=leaky 114 | 115 | [convolutional] 116 | size=1 117 | stride=1 118 | pad=1 119 | filters=425 120 | activation=linear 121 | 122 | [region] 123 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 124 | bias_match=1 125 | classes=80 126 | coords=4 127 | num=5 128 | softmax=1 129 | jitter=.2 130 | rescore=0 131 | 132 | object_scale=5 133 | noobject_scale=1 134 | class_scale=1 135 | coord_scale=1 136 | 137 | absolute=1 138 | thresh = .6 139 | random=1 140 | 141 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/voc.data: -------------------------------------------------------------------------------- 1 | train = data/voc_train.txt 2 | valid = data/2007_test.txt 3 | names = data/voc.names 4 | backup = backup 5 | gpus = 3 6 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/voc.names: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/voc_gaotie.data: -------------------------------------------------------------------------------- 1 | train = data/gaotie_trainval.txt 2 | valid = data/gaotie_test.txt 3 | names = data/voc.names 4 | backup = backup 5 | gpus = 3 -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/yolo-voc.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=64 4 | subdivisions=8 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | height=416 9 | width=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 80200 21 | policy=steps 22 | steps=-1,500,40000,60000 23 | scales=0.1,10,.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=125 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071 243 | bias_match=1 244 | classes=20 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/yolo.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=8 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=64 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=128 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [convolutional] 58 | batch_normalize=1 59 | filters=64 60 | size=1 61 | stride=1 62 | pad=1 63 | activation=leaky 64 | 65 | [convolutional] 66 | batch_normalize=1 67 | filters=128 68 | size=3 69 | stride=1 70 | pad=1 71 | activation=leaky 72 | 73 | [maxpool] 74 | size=2 75 | stride=2 76 | 77 | [convolutional] 78 | batch_normalize=1 79 | filters=256 80 | size=3 81 | stride=1 82 | pad=1 83 | activation=leaky 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=128 88 | size=1 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [convolutional] 94 | batch_normalize=1 95 | filters=256 96 | size=3 97 | stride=1 98 | pad=1 99 | activation=leaky 100 | 101 | [maxpool] 102 | size=2 103 | stride=2 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=512 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [convolutional] 114 | batch_normalize=1 115 | filters=256 116 | size=1 117 | stride=1 118 | pad=1 119 | activation=leaky 120 | 121 | [convolutional] 122 | batch_normalize=1 123 | filters=512 124 | size=3 125 | stride=1 126 | pad=1 127 | activation=leaky 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | [convolutional] 150 | batch_normalize=1 151 | filters=1024 152 | size=3 153 | stride=1 154 | pad=1 155 | activation=leaky 156 | 157 | [convolutional] 158 | batch_normalize=1 159 | filters=512 160 | size=1 161 | stride=1 162 | pad=1 163 | activation=leaky 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=1024 168 | size=3 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [convolutional] 174 | batch_normalize=1 175 | filters=512 176 | size=1 177 | stride=1 178 | pad=1 179 | activation=leaky 180 | 181 | [convolutional] 182 | batch_normalize=1 183 | filters=1024 184 | size=3 185 | stride=1 186 | pad=1 187 | activation=leaky 188 | 189 | 190 | ####### 191 | 192 | [convolutional] 193 | batch_normalize=1 194 | size=3 195 | stride=1 196 | pad=1 197 | filters=1024 198 | activation=leaky 199 | 200 | [convolutional] 201 | batch_normalize=1 202 | size=3 203 | stride=1 204 | pad=1 205 | filters=1024 206 | activation=leaky 207 | 208 | [route] 209 | layers=-9 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | size=1 214 | stride=1 215 | pad=1 216 | filters=64 217 | activation=leaky 218 | 219 | [reorg] 220 | stride=2 221 | 222 | [route] 223 | layers=-1,-4 224 | 225 | [convolutional] 226 | batch_normalize=1 227 | size=3 228 | stride=1 229 | pad=1 230 | filters=1024 231 | activation=leaky 232 | 233 | [convolutional] 234 | size=1 235 | stride=1 236 | pad=1 237 | filters=425 238 | activation=linear 239 | 240 | 241 | [region] 242 | anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 243 | bias_match=1 244 | classes=80 245 | coords=4 246 | num=5 247 | softmax=1 248 | jitter=.3 249 | rescore=1 250 | 251 | object_scale=5 252 | noobject_scale=1 253 | class_scale=1 254 | coord_scale=1 255 | 256 | absolute=1 257 | thresh = .6 258 | random=1 259 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/demo/004545.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/detector/YOLOv3/demo/004545.jpg -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/demo/results/004545.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/detector/YOLOv3/demo/results/004545.jpg -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/detect.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | from PIL import Image, ImageDraw 4 | #from models.tiny_yolo import TinyYoloNet 5 | from yolo_utils import * 6 | from darknet import Darknet 7 | 8 | import cv2 9 | 10 | namesfile=None 11 | def detect(cfgfile, weightfile, imgfolder): 12 | m = Darknet(cfgfile) 13 | 14 | #m.print_network() 15 | m.load_weights(weightfile) 16 | print('Loading weights from %s... Done!' % (weightfile)) 17 | 18 | # if m.num_classes == 20: 19 | # namesfile = 'data/voc.names' 20 | # elif m.num_classes == 80: 21 | # namesfile = 'data/coco.names' 22 | # else: 23 | # namesfile = 'data/names' 24 | 25 | use_cuda = True 26 | if use_cuda: 27 | m.cuda() 28 | 29 | imgfiles = [x for x in os.listdir(imgfolder) if x[-4:] == '.jpg'] 30 | imgfiles.sort() 31 | for imgname in imgfiles: 32 | imgfile = os.path.join(imgfolder,imgname) 33 | 34 | img = Image.open(imgfile).convert('RGB') 35 | sized = img.resize((m.width, m.height)) 36 | 37 | #for i in range(2): 38 | start = time.time() 39 | boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) 40 | finish = time.time() 41 | #if i == 1: 42 | print('%s: Predicted in %f seconds.' % (imgfile, (finish-start))) 43 | 44 | class_names = load_class_names(namesfile) 45 | img = plot_boxes(img, boxes, 'result/{}'.format(os.path.basename(imgfile)), class_names) 46 | img = np.array(img) 47 | cv2.imshow('{}'.format(os.path.basename(imgfolder)), img) 48 | cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000,800) 49 | cv2.waitKey(1000) 50 | 51 | def detect_cv2(cfgfile, weightfile, imgfile): 52 | import cv2 53 | m = Darknet(cfgfile) 54 | 55 | m.print_network() 56 | m.load_weights(weightfile) 57 | print('Loading weights from %s... Done!' % (weightfile)) 58 | 59 | if m.num_classes == 20: 60 | namesfile = 'data/voc.names' 61 | elif m.num_classes == 80: 62 | namesfile = 'data/coco.names' 63 | else: 64 | namesfile = 'data/names' 65 | 66 | use_cuda = True 67 | if use_cuda: 68 | m.cuda() 69 | 70 | img = cv2.imread(imgfile) 71 | sized = cv2.resize(img, (m.width, m.height)) 72 | sized = cv2.cvtColor(sized, cv2.COLOR_BGR2RGB) 73 | 74 | for i in range(2): 75 | start = time.time() 76 | boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) 77 | finish = time.time() 78 | if i == 1: 79 | print('%s: Predicted in %f seconds.' % (imgfile, (finish-start))) 80 | 81 | class_names = load_class_names(namesfile) 82 | plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names) 83 | 84 | def detect_skimage(cfgfile, weightfile, imgfile): 85 | from skimage import io 86 | from skimage.transform import resize 87 | m = Darknet(cfgfile) 88 | 89 | m.print_network() 90 | m.load_weights(weightfile) 91 | print('Loading weights from %s... Done!' % (weightfile)) 92 | 93 | if m.num_classes == 20: 94 | namesfile = 'data/voc.names' 95 | elif m.num_classes == 80: 96 | namesfile = 'data/coco.names' 97 | else: 98 | namesfile = 'data/names' 99 | 100 | use_cuda = True 101 | if use_cuda: 102 | m.cuda() 103 | 104 | img = io.imread(imgfile) 105 | sized = resize(img, (m.width, m.height)) * 255 106 | 107 | for i in range(2): 108 | start = time.time() 109 | boxes = do_detect(m, sized, 0.5, 0.4, use_cuda) 110 | finish = time.time() 111 | if i == 1: 112 | print('%s: Predicted in %f seconds.' % (imgfile, (finish-start))) 113 | 114 | class_names = load_class_names(namesfile) 115 | plot_boxes_cv2(img, boxes, savename='predictions.jpg', class_names=class_names) 116 | 117 | if __name__ == '__main__': 118 | if len(sys.argv) == 5: 119 | cfgfile = sys.argv[1] 120 | weightfile = sys.argv[2] 121 | imgfolder = sys.argv[3] 122 | cv2.namedWindow('{}'.format(os.path.basename(imgfolder)), cv2.WINDOW_NORMAL ) 123 | cv2.resizeWindow('{}'.format(os.path.basename(imgfolder)), 1000,800) 124 | globals()["namesfile"] = sys.argv[4] 125 | detect(cfgfile, weightfile, imgfolder) 126 | #detect_cv2(cfgfile, weightfile, imgfile) 127 | #detect_skimage(cfgfile, weightfile, imgfile) 128 | else: 129 | print('Usage: ') 130 | print(' python detect.py cfgfile weightfile imgfolder names') 131 | #detect('cfg/tiny-yolo-voc.cfg', 'tiny-yolo-voc.weights', 'data/person.jpg', version=1) 132 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms import boxes_nms -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/build.sh: -------------------------------------------------------------------------------- 1 | cd ext 2 | 3 | python build.py build_ext develop 4 | 5 | cd .. 6 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/detector/YOLOv3/nms/ext/__init__.py -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/build.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | 4 | import torch 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import CUDA_HOME 7 | from torch.utils.cpp_extension import CppExtension 8 | from torch.utils.cpp_extension import CUDAExtension 9 | 10 | requirements = ["torch"] 11 | 12 | 13 | def get_extensions(): 14 | extensions_dir = os.path.dirname(os.path.abspath(__file__)) 15 | 16 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 17 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 18 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 19 | 20 | sources = main_file + source_cpu 21 | extension = CppExtension 22 | 23 | extra_compile_args = {"cxx": []} 24 | define_macros = [] 25 | 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | extension = CUDAExtension 28 | sources += source_cuda 29 | define_macros += [("WITH_CUDA", None)] 30 | extra_compile_args["nvcc"] = [ 31 | "-DCUDA_HAS_FP16=1", 32 | "-D__CUDA_NO_HALF_OPERATORS__", 33 | "-D__CUDA_NO_HALF_CONVERSIONS__", 34 | "-D__CUDA_NO_HALF2_OPERATORS__", 35 | ] 36 | 37 | sources = [os.path.join(extensions_dir, s) for s in sources] 38 | 39 | include_dirs = [extensions_dir] 40 | 41 | ext_modules = [ 42 | extension( 43 | "torch_extension", 44 | sources, 45 | include_dirs=include_dirs, 46 | define_macros=define_macros, 47 | extra_compile_args=extra_compile_args, 48 | ) 49 | ] 50 | 51 | return ext_modules 52 | 53 | 54 | setup( 55 | name="torch_extension", 56 | version="0.1", 57 | ext_modules=get_extensions(), 58 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}) 59 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t) * (y2_t - y1_t); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1); 57 | auto h = std::max(static_cast(0), yy2 - yy1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | at::Tensor nms_cpu(const at::Tensor& dets, 6 | const at::Tensor& scores, 7 | const float threshold); 8 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 15 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 16 | float width = max(right - left, 0.f), height = max(bottom - top, 0.f); 17 | float interS = width * height; 18 | float Sa = (a[2] - a[0]) * (a[3] - a[1]); 19 | float Sb = (b[2] - b[0]) * (b[3] - b[1]); 20 | return interS / (Sa + Sb - interS); 21 | } 22 | 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 24 | const float *dev_boxes, unsigned long long *dev_mask) { 25 | const int row_start = blockIdx.y; 26 | const int col_start = blockIdx.x; 27 | 28 | // if (row_start > col_start) return; 29 | 30 | const int row_size = 31 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 32 | const int col_size = 33 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | __shared__ float block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const float *cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 61 | t |= 1ULL << i; 62 | } 63 | } 64 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 65 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 66 | } 67 | } 68 | 69 | // boxes is a N x 5 tensor 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 71 | using scalar_t = float; 72 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 73 | auto scores = boxes.select(1, 4); 74 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 75 | auto boxes_sorted = boxes.index_select(0, order_t); 76 | 77 | int boxes_num = boxes.size(0); 78 | 79 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 80 | 81 | scalar_t* boxes_dev = boxes_sorted.data(); 82 | 83 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 84 | 85 | unsigned long long* mask_dev = NULL; 86 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 87 | // boxes_num * col_blocks * sizeof(unsigned long long))); 88 | 89 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 90 | 91 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 92 | THCCeilDiv(boxes_num, threadsPerBlock)); 93 | dim3 threads(threadsPerBlock); 94 | nms_kernel<<>>(boxes_num, 95 | nms_overlap_thresh, 96 | boxes_dev, 97 | mask_dev); 98 | 99 | std::vector mask_host(boxes_num * col_blocks); 100 | THCudaCheck(cudaMemcpy(&mask_host[0], 101 | mask_dev, 102 | sizeof(unsigned long long) * boxes_num * col_blocks, 103 | cudaMemcpyDeviceToHost)); 104 | 105 | std::vector remv(col_blocks); 106 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 107 | 108 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 109 | int64_t* keep_out = keep.data(); 110 | 111 | int num_to_keep = 0; 112 | for (int i = 0; i < boxes_num; i++) { 113 | int nblock = i / threadsPerBlock; 114 | int inblock = i % threadsPerBlock; 115 | 116 | if (!(remv[nblock] & (1ULL << inblock))) { 117 | keep_out[num_to_keep++] = i; 118 | unsigned long long *p = &mask_host[0] + i * col_blocks; 119 | for (int j = nblock; j < col_blocks; j++) { 120 | remv[j] |= p[j]; 121 | } 122 | } 123 | } 124 | 125 | THCudaFree(state, mask_dev); 126 | // TODO improve this part 127 | return std::get<0>(order_t.index({ 128 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 129 | order_t.device(), keep.scalar_type()) 130 | }).sort(0, false)); 131 | } -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 6 | 7 | 8 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/torch_extension.cp37-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/detector/YOLOv3/nms/ext/torch_extension.cp37-win_amd64.pyd -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/ext/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | 4 | 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 6 | m.def("nms", &nms, "non-maximum suppression"); 7 | } 8 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/nms.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import torchvision 3 | 4 | try: 5 | import torch 6 | import torch_extension 7 | 8 | _nms = torch_extension.nms 9 | except ImportError: 10 | if torchvision.__version__ >= '0.3.0': 11 | _nms = torchvision.ops.nms 12 | else: 13 | from .python_nms import python_nms 14 | 15 | _nms = python_nms 16 | warnings.warn('You are using python version NMS, which is very very slow. Try compile c++ NMS ' 17 | 'using `cd ext & python build.py build_ext develop`') 18 | 19 | 20 | def boxes_nms(boxes, scores, nms_thresh, max_count=-1): 21 | """ Performs non-maximum suppression, run on GPU or CPU according to 22 | boxes's device. 23 | Args: 24 | boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(or relative coordinates), shape is (n, 4) 25 | scores(Tensor): scores, shape is (n, ) 26 | nms_thresh(float): thresh 27 | max_count (int): if > 0, then only the top max_proposals are kept after non-maximum suppression 28 | Returns: 29 | indices kept. 30 | """ 31 | keep = _nms(boxes, scores, nms_thresh) 32 | if max_count > 0: 33 | keep = keep[:max_count] 34 | return keep 35 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/nms/python_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | def python_nms(boxes, scores, nms_thresh): 6 | """ Performs non-maximum suppression using numpy 7 | Args: 8 | boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(not support relative coordinates), 9 | shape is (n, 4) 10 | scores(Tensor): scores, shape is (n, ) 11 | nms_thresh(float): thresh 12 | Returns: 13 | indices kept. 14 | """ 15 | if boxes.numel() == 0: 16 | return torch.empty((0,), dtype=torch.long) 17 | # Use numpy to run nms. Running nms in PyTorch code on CPU is really slow. 18 | origin_device = boxes.device 19 | cpu_device = torch.device('cpu') 20 | boxes = boxes.to(cpu_device).numpy() 21 | scores = scores.to(cpu_device).numpy() 22 | 23 | x1 = boxes[:, 0] 24 | y1 = boxes[:, 1] 25 | x2 = boxes[:, 2] 26 | y2 = boxes[:, 3] 27 | areas = (x2 - x1) * (y2 - y1) 28 | order = np.argsort(scores)[::-1] 29 | num_detections = boxes.shape[0] 30 | suppressed = np.zeros((num_detections,), dtype=np.bool) 31 | for _i in range(num_detections): 32 | i = order[_i] 33 | if suppressed[i]: 34 | continue 35 | ix1 = x1[i] 36 | iy1 = y1[i] 37 | ix2 = x2[i] 38 | iy2 = y2[i] 39 | iarea = areas[i] 40 | 41 | for _j in range(_i + 1, num_detections): 42 | j = order[_j] 43 | if suppressed[j]: 44 | continue 45 | 46 | xx1 = max(ix1, x1[j]) 47 | yy1 = max(iy1, y1[j]) 48 | xx2 = min(ix2, x2[j]) 49 | yy2 = min(iy2, y2[j]) 50 | w = max(0, xx2 - xx1) 51 | h = max(0, yy2 - yy1) 52 | 53 | inter = w * h 54 | ovr = inter / (iarea + areas[j] - inter) 55 | if ovr >= nms_thresh: 56 | suppressed[j] = True 57 | keep = np.nonzero(suppressed == 0)[0] 58 | keep = torch.from_numpy(keep).to(origin_device) 59 | return keep 60 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/region_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | import time 4 | import torch 5 | import torch.nn as nn 6 | from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu 7 | 8 | 9 | class RegionLayer(nn.Module): 10 | def __init__(self, num_classes=0, anchors=[], num_anchors=1, use_cuda=None): 11 | super(RegionLayer, self).__init__() 12 | use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda) 13 | self.device = torch.device("cuda" if use_cuda else "cpu") 14 | self.num_classes = num_classes 15 | self.num_anchors = num_anchors 16 | self.anchor_step = len(anchors) // num_anchors 17 | # self.anchors = torch.stack(torch.FloatTensor(anchors).split(self.anchor_step)).to(self.device) 18 | self.anchors = torch.FloatTensor(anchors).view(self.num_anchors, self.anchor_step).to(self.device) 19 | self.rescore = 1 20 | self.coord_scale = 1 21 | self.noobject_scale = 1 22 | self.object_scale = 5 23 | self.class_scale = 1 24 | self.thresh = 0.6 25 | self.seen = 0 26 | 27 | def build_targets(self, pred_boxes, target, nH, nW): 28 | nB = target.size(0) 29 | nA = self.num_anchors 30 | conf_mask = torch.ones(nB, nA, nH, nW) * self.noobject_scale 31 | coord_mask = torch.zeros(nB, nA, nH, nW) 32 | cls_mask = torch.zeros(nB, nA, nH, nW) 33 | tcoord = torch.zeros(4, nB, nA, nH, nW) 34 | tconf = torch.zeros(nB, nA, nH, nW) 35 | tcls = torch.zeros(nB, nA, nH, nW) 36 | 37 | nAnchors = nA * nH * nW 38 | nPixels = nH * nW 39 | nGT = 0 # number of ground truth 40 | nRecall = 0 41 | # it works faster on CPU than on GPU. 42 | anchors = self.anchors.to("cpu") 43 | 44 | if self.seen < 12800: 45 | tcoord[0].fill_(0.5) 46 | tcoord[1].fill_(0.5) 47 | coord_mask.fill_(1) 48 | 49 | for b in range(nB): 50 | cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t() 51 | cur_ious = torch.zeros(nAnchors) 52 | tbox = target[b].view(-1, 5).to("cpu") 53 | for t in range(50): 54 | if tbox[t][1] == 0: 55 | break 56 | gx, gw = [i * nW for i in (tbox[t][1], tbox[t][3])] 57 | gy, gh = [i * nH for i in (tbox[t][2], tbox[t][4])] 58 | cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t() 59 | cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False)) 60 | ignore_ix = cur_ious > self.thresh 61 | conf_mask[b][ignore_ix.view(nA, nH, nW)] = 0 62 | 63 | for t in range(50): 64 | if tbox[t][1] == 0: 65 | break 66 | nGT += 1 67 | gx, gw = [i * nW for i in (tbox[t][1], tbox[t][3])] 68 | gy, gh = [i * nH for i in (tbox[t][2], tbox[t][4])] 69 | gw, gh = gw.float(), gh.float() 70 | gi, gj = int(gx), int(gy) 71 | 72 | tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA, 1).t() 73 | anchor_boxes = torch.cat((torch.zeros(nA, 2), anchors), 1).t() 74 | tmp_ious = multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False) 75 | best_iou, best_n = torch.max(tmp_ious, 0) 76 | 77 | if self.anchor_step == 4: # this part is not tested. 78 | tmp_ious_mask = (tmp_ious == best_iou) 79 | if tmp_ious_mask.sum() > 0: 80 | gt_pos = torch.FloatTensor([gi, gj, gx, gy]).repeat(nA, 1).t() 81 | an_pos = anchor_boxes[4:6] # anchor_boxes are consisted of [0 0 aw ah ax ay] 82 | dist = pow(((gt_pos[0] + an_pos[0]) - gt_pos[2]), 2) + pow( 83 | ((gt_pos[1] + an_pos[1]) - gt_pos[3]), 2) 84 | dist[1 - tmp_ious_mask] = 10000 # set the large number for the small ious 85 | _, best_n = torch.min(dist, 0) 86 | 87 | gt_box = torch.FloatTensor([gx, gy, gw, gh]) 88 | pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi] 89 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) 90 | 91 | coord_mask[b][best_n][gj][gi] = 1 92 | cls_mask[b][best_n][gj][gi] = 1 93 | conf_mask[b][best_n][gj][gi] = self.object_scale 94 | tcoord[0][b][best_n][gj][gi] = gx - gi 95 | tcoord[1][b][best_n][gj][gi] = gy - gj 96 | tcoord[2][b][best_n][gj][gi] = math.log(gw / anchors[best_n][0]) 97 | tcoord[3][b][best_n][gj][gi] = math.log(gh / anchors[best_n][1]) 98 | tcls[b][best_n][gj][gi] = tbox[t][0] 99 | tconf[b][best_n][gj][gi] = iou if self.rescore else 1. 100 | if iou > 0.5: 101 | nRecall += 1 102 | 103 | return nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls 104 | 105 | def get_mask_boxes(self, output): 106 | if not isinstance(self.anchors, torch.Tensor): 107 | self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device) 108 | masked_anchors = self.anchors.view(-1) 109 | num_anchors = torch.IntTensor([self.num_anchors]).to(self.device) 110 | return {'x': output, 'a': masked_anchors, 'n': num_anchors} 111 | 112 | def forward(self, output, target): 113 | # output : BxAs*(4+1+num_classes)*H*W 114 | t0 = time.time() 115 | nB = output.data.size(0) # batch size 116 | nA = self.num_anchors 117 | nC = self.num_classes 118 | nH = output.data.size(2) 119 | nW = output.data.size(3) 120 | cls_anchor_dim = nB * nA * nH * nW 121 | 122 | if not isinstance(self.anchors, torch.Tensor): 123 | self.anchors = torch.FloatTensor(self.anchors).view(self.num_anchors, self.anchor_step).to(self.device) 124 | 125 | output = output.view(nB, nA, (5 + nC), nH, nW) 126 | cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device) 127 | ix = torch.LongTensor(range(0, 5)).to(self.device) 128 | pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) 129 | 130 | coord = output.index_select(2, ix[0:4]).view(nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view(-1, 131 | cls_anchor_dim) # x, y, w, h 132 | coord[0:2] = coord[0:2].sigmoid() # x, y 133 | conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() 134 | cls = output.index_select(2, cls_grid) 135 | cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(cls_anchor_dim, nC) 136 | 137 | t1 = time.time() 138 | grid_x = torch.linspace(0, nW - 1, nW).repeat(nB * nA, nH, 1).view(cls_anchor_dim).to(self.device) 139 | grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(cls_anchor_dim).to( 140 | self.device) 141 | anchor_w = self.anchors.index_select(1, ix[0]).repeat(1, nB * nH * nW).view(cls_anchor_dim) 142 | anchor_h = self.anchors.index_select(1, ix[1]).repeat(1, nB * nH * nW).view(cls_anchor_dim) 143 | 144 | pred_boxes[0] = coord[0] + grid_x 145 | pred_boxes[1] = coord[1] + grid_y 146 | pred_boxes[2] = coord[2].exp() * anchor_w 147 | pred_boxes[3] = coord[3].exp() * anchor_h 148 | # for build_targets. it works faster on CPU than on GPU 149 | pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach() 150 | 151 | t2 = time.time() 152 | nGT, nRecall, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \ 153 | self.build_targets(pred_boxes, target.detach(), nH, nW) 154 | 155 | cls_mask = (cls_mask == 1) 156 | tcls = tcls[cls_mask].long().view(-1) 157 | cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(self.device) 158 | cls = cls[cls_mask].view(-1, nC) 159 | 160 | nProposals = int((conf > 0.25).sum()) 161 | 162 | tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) 163 | tconf, tcls = tconf.to(self.device), tcls.to(self.device) 164 | coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.sqrt().to(self.device) 165 | 166 | t3 = time.time() 167 | loss_coord = self.coord_scale * nn.MSELoss(size_average=False)(coord * coord_mask, tcoord * coord_mask) / 2 168 | # sqrt(object_scale)/2 is almost equal to 1. 169 | loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2 170 | loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 171 | loss = loss_coord + loss_conf + loss_cls 172 | t4 = time.time() 173 | if False: 174 | print('-' * 30) 175 | print(' activation : %f' % (t1 - t0)) 176 | print(' create pred_boxes : %f' % (t2 - t1)) 177 | print(' build targets : %f' % (t3 - t2)) 178 | print(' create loss : %f' % (t4 - t3)) 179 | print(' total : %f' % (t4 - t0)) 180 | print('%d: nGT %3d, nRC %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' 181 | % (self.seen, nGT, nRecall, nProposals, loss_coord, loss_conf, loss_cls, loss)) 182 | if math.isnan(loss.item()): 183 | print(conf, tconf) 184 | sys.exit(0) 185 | return loss 186 | -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/weight/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/detector/YOLOv3/weight/.gitkeep -------------------------------------------------------------------------------- /deepsort/detector/YOLOv3/yolo_layer.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | import time 4 | import torch 5 | import torch.nn as nn 6 | from .yolo_utils import bbox_iou, multi_bbox_ious, convert2cpu 7 | 8 | 9 | class YoloLayer(nn.Module): 10 | def __init__(self, anchor_mask=[], num_classes=0, anchors=[], num_anchors=1, use_cuda=None): 11 | super(YoloLayer, self).__init__() 12 | use_cuda = torch.cuda.is_available() and (True if use_cuda is None else use_cuda) 13 | self.device = torch.device("cuda" if use_cuda else "cpu") 14 | 15 | self.anchor_mask = anchor_mask 16 | self.num_classes = num_classes 17 | self.anchors = anchors 18 | self.num_anchors = num_anchors 19 | self.anchor_step = len(anchors) // num_anchors 20 | self.rescore = 0 21 | self.ignore_thresh = 0.5 22 | self.truth_thresh = 1. 23 | self.stride = 32 24 | self.nth_layer = 0 25 | self.seen = 0 26 | self.net_width = 0 27 | self.net_height = 0 28 | 29 | def get_mask_boxes(self, output): 30 | masked_anchors = [] 31 | for m in self.anchor_mask: 32 | masked_anchors += self.anchors[m * self.anchor_step:(m + 1) * self.anchor_step] 33 | masked_anchors = [anchor / self.stride for anchor in masked_anchors] 34 | 35 | masked_anchors = torch.FloatTensor(masked_anchors).to(self.device) 36 | num_anchors = torch.IntTensor([len(self.anchor_mask)]).to(self.device) 37 | return {'x': output, 'a': masked_anchors, 'n': num_anchors} 38 | 39 | def build_targets(self, pred_boxes, target, anchors, nA, nH, nW): 40 | nB = target.size(0) 41 | anchor_step = anchors.size(1) # anchors[nA][anchor_step] 42 | conf_mask = torch.ones(nB, nA, nH, nW) 43 | coord_mask = torch.zeros(nB, nA, nH, nW) 44 | cls_mask = torch.zeros(nB, nA, nH, nW) 45 | tcoord = torch.zeros(4, nB, nA, nH, nW) 46 | tconf = torch.zeros(nB, nA, nH, nW) 47 | tcls = torch.zeros(nB, nA, nH, nW) 48 | twidth, theight = self.net_width / self.stride, self.net_height / self.stride 49 | 50 | nAnchors = nA * nH * nW 51 | nPixels = nH * nW 52 | nGT = 0 53 | nRecall = 0 54 | nRecall75 = 0 55 | 56 | # it works faster on CPU than on GPU. 57 | anchors = anchors.to("cpu") 58 | 59 | for b in range(nB): 60 | cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t() 61 | cur_ious = torch.zeros(nAnchors) 62 | tbox = target[b].view(-1, 5).to("cpu") 63 | for t in range(50): 64 | if tbox[t][1] == 0: 65 | break 66 | gx, gy = tbox[t][1] * nW, tbox[t][2] * nH 67 | gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight 68 | cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t() 69 | cur_ious = torch.max(cur_ious, multi_bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False)) 70 | ignore_ix = cur_ious > self.ignore_thresh 71 | conf_mask[b][ignore_ix.view(nA, nH, nW)] = 0 72 | 73 | for t in range(50): 74 | if tbox[t][1] == 0: 75 | break 76 | nGT += 1 77 | gx, gy = tbox[t][1] * nW, tbox[t][2] * nH 78 | gw, gh = tbox[t][3] * twidth, tbox[t][4] * theight 79 | gw, gh = gw.float(), gh.float() 80 | gi, gj = int(gx), int(gy) 81 | 82 | tmp_gt_boxes = torch.FloatTensor([0, 0, gw, gh]).repeat(nA, 1).t() 83 | anchor_boxes = torch.cat((torch.zeros(nA, anchor_step), anchors), 1).t() 84 | _, best_n = torch.max(multi_bbox_ious(tmp_gt_boxes, anchor_boxes, x1y1x2y2=False), 0) 85 | 86 | gt_box = torch.FloatTensor([gx, gy, gw, gh]) 87 | pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi] 88 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) 89 | 90 | coord_mask[b][best_n][gj][gi] = 1 91 | cls_mask[b][best_n][gj][gi] = 1 92 | conf_mask[b][best_n][gj][gi] = 1 93 | tcoord[0][b][best_n][gj][gi] = gx - gi 94 | tcoord[1][b][best_n][gj][gi] = gy - gj 95 | tcoord[2][b][best_n][gj][gi] = math.log(gw / anchors[best_n][0]) 96 | tcoord[3][b][best_n][gj][gi] = math.log(gh / anchors[best_n][1]) 97 | tcls[b][best_n][gj][gi] = tbox[t][0] 98 | tconf[b][best_n][gj][gi] = iou if self.rescore else 1. 99 | 100 | if iou > 0.5: 101 | nRecall += 1 102 | if iou > 0.75: 103 | nRecall75 += 1 104 | 105 | return nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls 106 | 107 | def forward(self, output, target): 108 | # output : BxAs*(4+1+num_classes)*H*W 109 | mask_tuple = self.get_mask_boxes(output) 110 | t0 = time.time() 111 | nB = output.data.size(0) # batch size 112 | nA = mask_tuple['n'].item() # num_anchors 113 | nC = self.num_classes 114 | nH = output.data.size(2) 115 | nW = output.data.size(3) 116 | anchor_step = mask_tuple['a'].size(0) // nA 117 | anchors = mask_tuple['a'].view(nA, anchor_step).to(self.device) 118 | cls_anchor_dim = nB * nA * nH * nW 119 | 120 | output = output.view(nB, nA, (5 + nC), nH, nW) 121 | cls_grid = torch.linspace(5, 5 + nC - 1, nC).long().to(self.device) 122 | ix = torch.LongTensor(range(0, 5)).to(self.device) 123 | pred_boxes = torch.FloatTensor(4, cls_anchor_dim).to(self.device) 124 | 125 | coord = output.index_select(2, ix[0:4]).view(nB * nA, -1, nH * nW).transpose(0, 1).contiguous().view(-1, 126 | cls_anchor_dim) # x, y, w, h 127 | coord[0:2] = coord[0:2].sigmoid() # x, y 128 | conf = output.index_select(2, ix[4]).view(nB, nA, nH, nW).sigmoid() 129 | cls = output.index_select(2, cls_grid) 130 | cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(cls_anchor_dim, nC) 131 | 132 | t1 = time.time() 133 | grid_x = torch.linspace(0, nW - 1, nW).repeat(nB * nA, nH, 1).view(cls_anchor_dim).to(self.device) 134 | grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(cls_anchor_dim).to( 135 | self.device) 136 | anchor_w = anchors.index_select(1, ix[0]).repeat(1, nB * nH * nW).view(cls_anchor_dim) 137 | anchor_h = anchors.index_select(1, ix[1]).repeat(1, nB * nH * nW).view(cls_anchor_dim) 138 | 139 | pred_boxes[0] = coord[0] + grid_x 140 | pred_boxes[1] = coord[1] + grid_y 141 | pred_boxes[2] = coord[2].exp() * anchor_w 142 | pred_boxes[3] = coord[3].exp() * anchor_h 143 | # for build_targets. it works faster on CPU than on GPU 144 | pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4)).detach() 145 | 146 | t2 = time.time() 147 | nGT, nRecall, nRecall75, coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = \ 148 | self.build_targets(pred_boxes, target.detach(), anchors.detach(), nA, nH, nW) 149 | 150 | cls_mask = (cls_mask == 1) 151 | tcls = tcls[cls_mask].long().view(-1) 152 | cls_mask = cls_mask.view(-1, 1).repeat(1, nC).to(self.device) 153 | cls = cls[cls_mask].view(-1, nC) 154 | 155 | nProposals = int((conf > 0.25).sum()) 156 | 157 | tcoord = tcoord.view(4, cls_anchor_dim).to(self.device) 158 | tconf, tcls = tconf.to(self.device), tcls.to(self.device) 159 | coord_mask, conf_mask = coord_mask.view(cls_anchor_dim).to(self.device), conf_mask.to(self.device) 160 | 161 | t3 = time.time() 162 | loss_coord = nn.MSELoss(size_average=False)(coord * coord_mask, tcoord * coord_mask) / 2 163 | loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) 164 | loss_cls = nn.CrossEntropyLoss(size_average=False)(cls, tcls) if cls.size(0) > 0 else 0 165 | loss = loss_coord + loss_conf + loss_cls 166 | 167 | t4 = time.time() 168 | if False: 169 | print('-' * 30) 170 | print(' activation : %f' % (t1 - t0)) 171 | print(' create pred_boxes : %f' % (t2 - t1)) 172 | print(' build targets : %f' % (t3 - t2)) 173 | print(' create loss : %f' % (t4 - t3)) 174 | print(' total : %f' % (t4 - t0)) 175 | print( 176 | '%d: Layer(%03d) nGT %3d, nRC %3d, nRC75 %3d, nPP %3d, loss: box %6.3f, conf %6.3f, class %6.3f, total %7.3f' 177 | % (self.seen, self.nth_layer, nGT, nRecall, nRecall75, nProposals, loss_coord, loss_conf, loss_cls, loss)) 178 | if math.isnan(loss.item()): 179 | print(conf, tconf) 180 | sys.exit(0) 181 | return loss 182 | -------------------------------------------------------------------------------- /deepsort/detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .YOLOv3 import YOLOv3 2 | 3 | 4 | __all__ = ['build_detector'] 5 | 6 | def build_detector(use_cuda): 7 | return YOLOv3('./deepsort/detector/YOLOv3/cfg/yolo_v3.cfg', './deepsort/detector/YOLOv3/weight/yolov3.weights','./deepsort/detector/YOLOv3/cfg/coco.names', 8 | score_thresh=0.5, nms_thresh=0.4, 9 | is_xywh=True, use_cuda=use_cuda) 10 | -------------------------------------------------------------------------------- /deepsort/ped_det_server.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module gets video in input and outputs the 3 | json file with coordination of bboxes in the video. 4 | 5 | """ 6 | from os.path import basename, splitext, join, isfile, isdir, dirname 7 | from os import makedirs 8 | 9 | from tqdm import tqdm 10 | import cv2 11 | import argparse 12 | import torch 13 | 14 | from detector import build_detector 15 | from deep_sort import build_tracker 16 | from utils.tools import tik_tok, is_video 17 | from utils.draw import compute_color_for_labels 18 | from utils.parser import get_config 19 | from utils.json_logger import BboxToJsonLogger 20 | import warnings 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument("--VIDEO_PATH", type=str, default="./demo/ped.avi") 26 | parser.add_argument("--config_detection", type=str, default="./configs/yolov3.yaml") 27 | parser.add_argument("--config_deepsort", type=str, default="./configs/deep_sort.yaml") 28 | parser.add_argument("--write-fps", type=int, default=20) 29 | parser.add_argument("--frame_interval", type=int, default=1) 30 | parser.add_argument("--save_path", type=str, default="./output") 31 | parser.add_argument("--cpu", dest="use_cuda", action="store_false", default=True) 32 | args = parser.parse_args() 33 | 34 | assert isfile(args.VIDEO_PATH), "Error: Video not found" 35 | assert is_video(args.VIDEO_PATH), "Error: Not Supported format" 36 | if args.frame_interval < 1: args.frame_interval = 1 37 | 38 | return args 39 | 40 | 41 | class VideoTracker(object): 42 | def __init__(self, cfg, args): 43 | self.cfg = cfg 44 | self.args = args 45 | use_cuda = args.use_cuda and torch.cuda.is_available() 46 | if not use_cuda: 47 | warnings.warn("Running in cpu mode!") 48 | 49 | self.vdo = cv2.VideoCapture() 50 | self.detector = build_detector(cfg, use_cuda=use_cuda) 51 | self.deepsort = build_tracker(cfg, use_cuda=use_cuda) 52 | self.class_names = self.detector.class_names 53 | 54 | # Configure output video and json 55 | self.logger = BboxToJsonLogger() 56 | filename, extension = splitext(basename(self.args.VIDEO_PATH)) 57 | self.output_file = join(self.args.save_path, f'{filename}.avi') 58 | self.json_output = join(self.args.save_path, f'{filename}.json') 59 | if not isdir(dirname(self.json_output)): 60 | makedirs(dirname(self.json_output)) 61 | 62 | def __enter__(self): 63 | self.vdo.open(self.args.VIDEO_PATH) 64 | self.total_frames = int(cv2.VideoCapture.get(self.vdo, cv2.CAP_PROP_FRAME_COUNT)) 65 | self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) 66 | self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) 67 | 68 | video_details = {'frame_width': self.im_width, 69 | 'frame_height': self.im_height, 70 | 'frame_rate': self.args.write_fps, 71 | 'video_name': self.args.VIDEO_PATH} 72 | codec = cv2.VideoWriter_fourcc(*'XVID') 73 | self.writer = cv2.VideoWriter(self.output_file, codec, self.args.write_fps, 74 | (self.im_width, self.im_height)) 75 | self.logger.add_video_details(**video_details) 76 | 77 | assert self.vdo.isOpened() 78 | return self 79 | 80 | def __exit__(self, exc_type, exc_value, exc_traceback): 81 | if exc_type: 82 | print(exc_type, exc_value, exc_traceback) 83 | 84 | def run(self): 85 | idx_frame = 0 86 | pbar = tqdm(total=self.total_frames + 1) 87 | while self.vdo.grab(): 88 | if idx_frame % args.frame_interval == 0: 89 | _, ori_im = self.vdo.retrieve() 90 | timestamp = self.vdo.get(cv2.CAP_PROP_POS_MSEC) 91 | frame_id = int(self.vdo.get(cv2.CAP_PROP_POS_FRAMES)) 92 | self.logger.add_frame(frame_id=frame_id, timestamp=timestamp) 93 | self.detection(frame=ori_im, frame_id=frame_id) 94 | self.save_frame(ori_im) 95 | idx_frame += 1 96 | pbar.update() 97 | self.logger.json_output(self.json_output) 98 | 99 | @tik_tok 100 | def detection(self, frame, frame_id): 101 | im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 102 | # do detection 103 | bbox_xywh, cls_conf, cls_ids = self.detector(im) 104 | if bbox_xywh is not None: 105 | # select person class 106 | mask = cls_ids == 0 107 | 108 | bbox_xywh = bbox_xywh[mask] 109 | bbox_xywh[:, 3:] *= 1.2 # bbox dilation just in case bbox too small 110 | cls_conf = cls_conf[mask] 111 | 112 | # do tracking 113 | outputs = self.deepsort.update(bbox_xywh, cls_conf, im) 114 | 115 | # draw boxes for visualization 116 | if len(outputs) > 0: 117 | frame = self.draw_boxes(img=frame, frame_id=frame_id, output=outputs) 118 | 119 | def draw_boxes(self, img, frame_id, output, offset=(0, 0)): 120 | for i, box in enumerate(output): 121 | x1, y1, x2, y2, identity = [int(ii) for ii in box] 122 | self.logger.add_bbox_to_frame(frame_id=frame_id, 123 | bbox_id=identity, 124 | top=y1, 125 | left=x1, 126 | width=x2 - x1, 127 | height=y2 - y1) 128 | x1 += offset[0] 129 | x2 += offset[0] 130 | y1 += offset[1] 131 | y2 += offset[1] 132 | 133 | # box text and bar 134 | self.logger.add_label_to_bbox(frame_id=frame_id, bbox_id=identity, category='pedestrian', confidence=0.9) 135 | color = compute_color_for_labels(identity) 136 | label = '{}{:d}'.format("", identity) 137 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] 138 | cv2.rectangle(img, (x1, y1), (x2, y2), color, 3) 139 | cv2.rectangle(img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1) 140 | cv2.putText(img, label, (x1, y1 + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) 141 | return img 142 | 143 | def save_frame(self, frame) -> None: 144 | if frame is not None: self.writer.write(frame) 145 | 146 | 147 | if __name__ == "__main__": 148 | args = parse_args() 149 | cfg = get_config() 150 | cfg.merge_from_file(args.config_detection) 151 | cfg.merge_from_file(args.config_deepsort) 152 | 153 | with VideoTracker(cfg, args) as vdo_trk: 154 | vdo_trk.run() 155 | 156 | -------------------------------------------------------------------------------- /deepsort/scripts/yolov3_deepsort.sh: -------------------------------------------------------------------------------- 1 | python yolov3_deepsort.py [VIDEO_PATH] --config_detection -------------------------------------------------------------------------------- /deepsort/scripts/yolov3_tiny_deepsort.sh: -------------------------------------------------------------------------------- 1 | python yolov3_deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml -------------------------------------------------------------------------------- /deepsort/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/deepsort/utils/__init__.py -------------------------------------------------------------------------------- /deepsort/utils/asserts.py: -------------------------------------------------------------------------------- 1 | from os import environ 2 | 3 | 4 | def assert_in(file, files_to_check): 5 | if file not in files_to_check: 6 | raise AssertionError("{} does not exist in the list".format(str(file))) 7 | return True 8 | 9 | 10 | def assert_in_env(check_list: list): 11 | for item in check_list: 12 | assert_in(item, environ.keys()) 13 | return True 14 | -------------------------------------------------------------------------------- /deepsort/utils/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 5 | 6 | 7 | def compute_color_for_labels(label): 8 | """ 9 | Simple function that adds fixed color depending on the class 10 | """ 11 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 12 | return tuple(color) 13 | 14 | def draw_boxes(img, bbox, identities=None, offset=(0,0)): 15 | for i,box in enumerate(bbox): 16 | x1,y1,x2,y2 = [int(i) for i in box] 17 | x1 += offset[0] 18 | x2 += offset[0] 19 | y1 += offset[1] 20 | y2 += offset[1] 21 | # box text and bar 22 | id = int(identities[i]) if identities is not None else 0 23 | color = compute_color_for_labels(id) 24 | # label = '{}{:d}'.format("", id) 25 | # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 26 | cv2.rectangle(img,(x1, y1),(x2,y2),color,10) 27 | # cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 28 | # cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 29 | return img 30 | 31 | 32 | if __name__ == '__main__': 33 | for i in range(82): 34 | print(compute_color_for_labels(i)) 35 | -------------------------------------------------------------------------------- /deepsort/utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | from utils.io import read_results, unzip_objs 7 | 8 | 9 | class Evaluator(object): 10 | 11 | def __init__(self, data_root, seq_name, data_type): 12 | self.data_root = data_root 13 | self.seq_name = seq_name 14 | self.data_type = data_type 15 | 16 | self.load_annotations() 17 | self.reset_accumulator() 18 | 19 | def load_annotations(self): 20 | assert self.data_type == 'mot' 21 | 22 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 23 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 24 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 25 | 26 | def reset_accumulator(self): 27 | self.acc = mm.MOTAccumulator(auto_id=True) 28 | 29 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 30 | # results 31 | trk_tlwhs = np.copy(trk_tlwhs) 32 | trk_ids = np.copy(trk_ids) 33 | 34 | # gts 35 | gt_objs = self.gt_frame_dict.get(frame_id, []) 36 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 37 | 38 | # ignore boxes 39 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 40 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 41 | 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | 57 | # get distance matrix 58 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 59 | 60 | # acc 61 | self.acc.update(gt_ids, trk_ids, iou_distance) 62 | 63 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 64 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 65 | else: 66 | events = None 67 | return events 68 | 69 | def eval_file(self, filename): 70 | self.reset_accumulator() 71 | 72 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 73 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 74 | for frame_id in frames: 75 | trk_objs = result_frame_dict.get(frame_id, []) 76 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 77 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 78 | 79 | return self.acc 80 | 81 | @staticmethod 82 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 83 | names = copy.deepcopy(names) 84 | if metrics is None: 85 | metrics = mm.metrics.motchallenge_metrics 86 | metrics = copy.deepcopy(metrics) 87 | 88 | mh = mm.metrics.create() 89 | summary = mh.compute_many( 90 | accs, 91 | metrics=metrics, 92 | names=names, 93 | generate_overall=True 94 | ) 95 | 96 | return summary 97 | 98 | @staticmethod 99 | def save_summary(summary, filename): 100 | import pandas as pd 101 | writer = pd.ExcelWriter(filename) 102 | summary.to_excel(writer) 103 | writer.save() 104 | -------------------------------------------------------------------------------- /deepsort/utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | # from utils.log import get_logger 6 | 7 | 8 | def write_results(filename, results, data_type): 9 | if data_type == 'mot': 10 | save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' 11 | elif data_type == 'kitti': 12 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 13 | else: 14 | raise ValueError(data_type) 15 | 16 | with open(filename, 'w') as f: 17 | for frame_id, tlwhs, track_ids in results: 18 | if data_type == 'kitti': 19 | frame_id -= 1 20 | for tlwh, track_id in zip(tlwhs, track_ids): 21 | if track_id < 0: 22 | continue 23 | x1, y1, w, h = tlwh 24 | x2, y2 = x1 + w, y1 + h 25 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 26 | f.write(line) 27 | 28 | 29 | # def write_results(filename, results_dict: Dict, data_type: str): 30 | # if not filename: 31 | # return 32 | # path = os.path.dirname(filename) 33 | # if not os.path.exists(path): 34 | # os.makedirs(path) 35 | 36 | # if data_type in ('mot', 'mcmot', 'lab'): 37 | # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 38 | # elif data_type == 'kitti': 39 | # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 40 | # else: 41 | # raise ValueError(data_type) 42 | 43 | # with open(filename, 'w') as f: 44 | # for frame_id, frame_data in results_dict.items(): 45 | # if data_type == 'kitti': 46 | # frame_id -= 1 47 | # for tlwh, track_id in frame_data: 48 | # if track_id < 0: 49 | # continue 50 | # x1, y1, w, h = tlwh 51 | # x2, y2 = x1 + w, y1 + h 52 | # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 53 | # f.write(line) 54 | # logger.info('Save results to {}'.format(filename)) 55 | 56 | 57 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 58 | if data_type in ('mot', 'lab'): 59 | read_fun = read_mot_results 60 | else: 61 | raise ValueError('Unknown data type: {}'.format(data_type)) 62 | 63 | return read_fun(filename, is_gt, is_ignore) 64 | 65 | 66 | """ 67 | labels={'ped', ... % 1 68 | 'person_on_vhcl', ... % 2 69 | 'car', ... % 3 70 | 'bicycle', ... % 4 71 | 'mbike', ... % 5 72 | 'non_mot_vhcl', ... % 6 73 | 'static_person', ... % 7 74 | 'distractor', ... % 8 75 | 'occluder', ... % 9 76 | 'occluder_on_grnd', ... %10 77 | 'occluder_full', ... % 11 78 | 'reflection', ... % 12 79 | 'crowd' ... % 13 80 | }; 81 | """ 82 | 83 | 84 | def read_mot_results(filename, is_gt, is_ignore): 85 | valid_labels = {1} 86 | ignore_labels = {2, 7, 8, 12} 87 | results_dict = dict() 88 | if os.path.isfile(filename): 89 | with open(filename, 'r') as f: 90 | for line in f.readlines(): 91 | linelist = line.split(',') 92 | if len(linelist) < 7: 93 | continue 94 | fid = int(linelist[0]) 95 | if fid < 1: 96 | continue 97 | results_dict.setdefault(fid, list()) 98 | 99 | if is_gt: 100 | if 'MOT16-' in filename or 'MOT17-' in filename: 101 | label = int(float(linelist[7])) 102 | mark = int(float(linelist[6])) 103 | if mark == 0 or label not in valid_labels: 104 | continue 105 | score = 1 106 | elif is_ignore: 107 | if 'MOT16-' in filename or 'MOT17-' in filename: 108 | label = int(float(linelist[7])) 109 | vis_ratio = float(linelist[8]) 110 | if label not in ignore_labels and vis_ratio >= 0: 111 | continue 112 | else: 113 | continue 114 | score = 1 115 | else: 116 | score = float(linelist[6]) 117 | 118 | tlwh = tuple(map(float, linelist[2:6])) 119 | target_id = int(linelist[1]) 120 | 121 | results_dict[fid].append((tlwh, target_id, score)) 122 | 123 | return results_dict 124 | 125 | 126 | def unzip_objs(objs): 127 | if len(objs) > 0: 128 | tlwhs, ids, scores = zip(*objs) 129 | else: 130 | tlwhs, ids, scores = [], [], [] 131 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 132 | 133 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /deepsort/utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.INFO) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | -------------------------------------------------------------------------------- /deepsort/utils/parser.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | from easydict import EasyDict as edict 4 | 5 | class YamlParser(edict): 6 | """ 7 | This is yaml parser based on EasyDict. 8 | """ 9 | def __init__(self, cfg_dict=None, config_file=None): 10 | if cfg_dict is None: 11 | cfg_dict = {} 12 | 13 | if config_file is not None: 14 | assert(os.path.isfile(config_file)) 15 | with open(config_file, 'r') as fo: 16 | cfg_dict.update(yaml.load(fo.read())) 17 | 18 | super(YamlParser, self).__init__(cfg_dict) 19 | 20 | 21 | def merge_from_file(self, config_file): 22 | with open(config_file, 'r') as fo: 23 | self.update(yaml.load(fo.read())) 24 | 25 | 26 | def merge_from_dict(self, config_dict): 27 | self.update(config_dict) 28 | 29 | 30 | def get_config(config_file=None): 31 | return YamlParser(config_file=config_file) 32 | 33 | 34 | if __name__ == "__main__": 35 | cfg = YamlParser(config_file="../configs/yolov3.yaml") 36 | cfg.merge_from_file("../configs/deep_sort.yaml") 37 | 38 | import ipdb; ipdb.set_trace() -------------------------------------------------------------------------------- /deepsort/utils/tools.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from time import time 3 | 4 | 5 | def is_video(ext: str): 6 | """ 7 | Returns true if ext exists in 8 | allowed_exts for video files. 9 | 10 | Args: 11 | ext: 12 | 13 | Returns: 14 | 15 | """ 16 | 17 | allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp') 18 | return any((ext.endswith(x) for x in allowed_exts)) 19 | 20 | 21 | def tik_tok(func): 22 | """ 23 | keep track of time for each process. 24 | Args: 25 | func: 26 | 27 | Returns: 28 | 29 | """ 30 | @wraps(func) 31 | def _time_it(*args, **kwargs): 32 | start = time() 33 | try: 34 | return func(*args, **kwargs) 35 | finally: 36 | end_ = time() 37 | print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start))) 38 | 39 | return _time_it 40 | -------------------------------------------------------------------------------- /demo/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/demo/1.png -------------------------------------------------------------------------------- /demo/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/demo/2.png -------------------------------------------------------------------------------- /demo/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/demo/3.png -------------------------------------------------------------------------------- /demo/dog.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/demo/dog.jpg -------------------------------------------------------------------------------- /demo/mobile.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/demo/mobile.gif -------------------------------------------------------------------------------- /demo/output.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/demo/output.gif -------------------------------------------------------------------------------- /detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import logging 3 | import numpy as np 4 | import cv2 5 | 6 | from deepsort.detector.YOLOv3.darknet import Darknet 7 | from deepsort.detector.YOLOv3.yolo_utils import get_all_boxes, nms, post_process, xywh_to_xyxy, xyxy_to_xywh 8 | from deepsort.detector.YOLOv3.nms import boxes_nms 9 | 10 | 11 | class YOLOv3(object): 12 | def __init__(self, cfgfile, weightfile, namesfile, score_thresh=0.7, conf_thresh=0.01, nms_thresh=0.45, 13 | is_xywh=False, use_cuda=False): 14 | # net definition 15 | self.net = Darknet(cfgfile) 16 | self.net.load_weights(weightfile) 17 | logger = logging.getLogger("root.detector") 18 | logger.info('Loading weights from %s... Done!' % (weightfile)) 19 | self.device = "cuda" if use_cuda else "cpu" 20 | self.net.eval() 21 | self.net.to(self.device) 22 | 23 | # constants 24 | self.size = self.net.width, self.net.height 25 | self.score_thresh = score_thresh 26 | self.conf_thresh = conf_thresh 27 | self.nms_thresh = nms_thresh 28 | self.use_cuda = use_cuda 29 | self.is_xywh = is_xywh 30 | self.num_classes = self.net.num_classes 31 | self.class_names = self.load_class_names(namesfile) 32 | 33 | def __call__(self, ori_img): 34 | # img to tensor 35 | assert isinstance(ori_img, np.ndarray), "input must be a numpy array!" 36 | img = ori_img.astype(np.float) / 255. 37 | 38 | img = cv2.resize(img, self.size) 39 | img = torch.from_numpy(img).float().permute(2, 0, 1).unsqueeze(0) 40 | 41 | # forward 42 | with torch.no_grad(): 43 | img = img.to(self.device) 44 | out_boxes = self.net(img) 45 | boxes = get_all_boxes(out_boxes, self.conf_thresh, self.num_classes, 46 | use_cuda=self.use_cuda) # batch size is 1 47 | # boxes = nms(boxes, self.nms_thresh) 48 | 49 | boxes = post_process(boxes, self.net.num_classes, self.conf_thresh, self.nms_thresh)[0].cpu() 50 | boxes = boxes[boxes[:, -2] > self.score_thresh, :] # bbox xmin ymin xmax ymax 51 | 52 | if len(boxes) == 0: 53 | bbox = torch.FloatTensor([]).reshape([0, 4]) 54 | cls_conf = torch.FloatTensor([]) 55 | cls_ids = torch.LongTensor([]) 56 | else: 57 | height, width = ori_img.shape[:2] 58 | bbox = boxes[:, :4] 59 | if self.is_xywh: 60 | # bbox x y w h 61 | bbox = xyxy_to_xywh(bbox) 62 | 63 | bbox *= torch.FloatTensor([[width, height, width, height]]) 64 | cls_conf = boxes[:, 5] 65 | cls_ids = boxes[:, 6].long() 66 | return bbox.numpy(), cls_conf.numpy(), cls_ids.numpy() 67 | 68 | def load_class_names(self, namesfile): 69 | with open(namesfile, 'r', encoding='utf8') as fp: 70 | class_names = [line.strip() for line in fp.readlines()] 71 | return class_names 72 | 73 | 74 | def demo(): 75 | import os 76 | from vizer.draw import draw_boxes 77 | 78 | yolo = YOLOv3(r"deepsort/detector/YOLOv3/cfg/yolo_v3.cfg", r"deepsort/detector/YOLOv3/weight/yolov3.weights", r"data/labels/coco.names") 79 | print("yolo.size =", yolo.size) 80 | root = "deepsort/detector/YOLOv3/demo" 81 | resdir = os.path.join(root, "results") 82 | os.makedirs(resdir, exist_ok=True) 83 | files = [os.path.join(root, file) for file in os.listdir(root) if file.endswith('.jpg')] 84 | files.sort() 85 | for filename in files: 86 | img = cv2.imread(filename) 87 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 88 | bbox, cls_conf, cls_ids = yolo(img) 89 | 90 | if bbox is not None: 91 | img = draw_boxes(img, bbox, cls_ids, cls_conf, class_name_map=yolo.class_names) 92 | # save results 93 | cv2.imwrite(os.path.join(resdir, os.path.basename(filename)), img[:, :, (2, 1, 0)]) 94 | # imshow 95 | # cv2.namedWindow("yolo", cv2.WINDOW_NORMAL) 96 | # cv2.resizeWindow("yolo", 600,600) 97 | # cv2.imshow("yolo",res[:,:,(2,1,0)]) 98 | # cv2.waitKey(0) 99 | 100 | 101 | if __name__ == "__main__": 102 | demo() 103 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | numpy 4 | opencv-python==4.1.2.30 5 | lxml 6 | tqdm 7 | flask 8 | seaborn 9 | pillow 10 | vizer 11 | numba -------------------------------------------------------------------------------- /result.txt: -------------------------------------------------------------------------------- 1 | video.mp4 -------------------------------------------------------------------------------- /static/ajax-loader.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/static/ajax-loader.gif -------------------------------------------------------------------------------- /static/client.js: -------------------------------------------------------------------------------- 1 | var el = x => document.getElementById(x); 2 | 3 | function showPicker() { 4 | el("file-input").click(); 5 | } 6 | 7 | function showPicked(input) { 8 | el("upload-label").innerHTML = input.files[0].name; 9 | 10 | var reader = new FileReader(); 11 | reader.onload = function (e) { 12 | if (e.target.result.split("/")[0].split(":")[1] == "image"){ 13 | el("image-picked").src = e.target.result; 14 | el("image-picked").className = ""; 15 | el("image-picked1").className = "no-display"; 16 | } 17 | else{ 18 | el("image-picked1").src = e.target.result; 19 | el("image-picked1").className = ""; 20 | el("image-picked").className = "no-display"; 21 | } 22 | }; 23 | reader.readAsDataURL(input.files[0]); 24 | } -------------------------------------------------------------------------------- /static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Transformer-man/yolo-deepsort-flask/e78e030e5ba670625acd29a35aa0ceca96a6dc14/static/logo.png -------------------------------------------------------------------------------- /static/style.css: -------------------------------------------------------------------------------- 1 | .modal { 2 | display: none; 3 | position: fixed; 4 | z-index: 1000; 5 | top: 0; 6 | left: 0; 7 | height: 100%; 8 | width: 100%; 9 | background: rgba( 255, 255, 255, .8 ) 10 | url('/static/ajax-loader.gif') 11 | 50% 50% 12 | no-repeat; 13 | } 14 | 15 | /* When the body has the loading class, we turn 16 | the scrollbar off with overflow:hidden */ 17 | body.loading .modal { 18 | overflow: hidden; 19 | } 20 | 21 | /* Anytime the body has the loading class, our 22 | modal element will be visible */ 23 | body.loading .modal { 24 | display: block; 25 | } -------------------------------------------------------------------------------- /static/style1.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #fff; 3 | } 4 | 5 | .no-display { 6 | display: none; 7 | } 8 | 9 | .center { 10 | margin: auto; 11 | padding: 10px 50px; 12 | text-align: center; 13 | font-size: 14px; 14 | } 15 | 16 | .title { 17 | font-size: 30px; 18 | margin-top: 1em; 19 | margin-bottom: 1em; 20 | color: #262626; 21 | } 22 | 23 | .content { 24 | margin-top: 10em; 25 | } 26 | 27 | .analyze { 28 | margin-top: 5em; 29 | } 30 | 31 | .upload-label { 32 | padding: 10px; 33 | font-size: 12px; 34 | } 35 | 36 | .result-label { 37 | margin-top: 0.5em; 38 | padding: 10px; 39 | font-size: 13px; 40 | } 41 | 42 | button.choose-file-button { 43 | width: 200px; 44 | height: 40px; 45 | border-radius: 2px; 46 | background-color: #ffffff; 47 | border: solid 1px #ff8100; 48 | font-size: 13px; 49 | color: #ff8100; 50 | } 51 | 52 | button.analyze-button { 53 | width: 200px; 54 | height: 40px; 55 | border: solid 1px #ff8100; 56 | border-radius: 2px; 57 | background-color: #ff8100; 58 | font-size: 13px; 59 | color: #ffffff; 60 | } 61 | 62 | button:focus { 63 | outline: 0; 64 | } 65 | -------------------------------------------------------------------------------- /static/worker.js: -------------------------------------------------------------------------------- 1 | $('#detections').hide() 2 | var $loading = $('#loading').hide(); 3 | 4 | $('#updateCamera').click(function (event) { 5 | event.preventDefault(); 6 | const data = { 7 | "gray": $('#gray').is(":checked"), 8 | "gaussian": $('#gaussian').is(":checked"), 9 | "sobel": $('#sobel').is(":checked"), 10 | "canny": $('#canny').is(":checked"), 11 | } 12 | console.log(data) 13 | $.ajax({ 14 | type: 'POST', 15 | url: '/cameraParams', 16 | data: data, 17 | success: function (success) { 18 | console.log(success) 19 | }, error: function (error) { 20 | console.log(error) 21 | } 22 | }) 23 | }); 24 | 25 | var loadFile = function (event) { 26 | var output = document.getElementById('input'); 27 | output.src = URL.createObjectURL(event.target.files[0]); 28 | }; 29 | 30 | $(document) 31 | .ajaxStart(function () { 32 | $loading.show(); 33 | }) 34 | .ajaxStop(function () { 35 | $loading.hide(); 36 | }); 37 | -------------------------------------------------------------------------------- /templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |
10 |
Object Detection
11 |
12 |
13 |
15 | 21 |
22 | 23 |
24 | 25 |
26 |
27 | Chosen Image 28 |
30 |
31 | 32 |
33 | 34 |
35 |
36 |
37 | 38 | 39 | -------------------------------------------------------------------------------- /templates/index1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 10 | 11 | 12 | 13 | yolo deepsort 14 | 15 | 16 |
17 | 24 |
25 |
26 |
27 |
Target Detection and Multi-Target Tracking Platform
28 |
29 |
30 |
32 | 38 |
39 | 40 |
41 | 42 |
43 |
44 | Chosen Image 45 |
47 |
48 | 49 |
50 | 51 |
52 |
53 |
54 | 55 | -------------------------------------------------------------------------------- /templates/real-time.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 12 | 13 | emannuell.com.br 14 | 15 | 16 | 17 |
18 | 31 |
32 |
33 |

Video Display

34 | 35 |
36 |
37 |

Using a video processor:

38 |
39 | Gray
40 | YOLO DeepSort
41 | Sobel
42 | Canny Edge Detector
43 | 44 |
45 |
46 |
47 |
48 | 50 | 53 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /text.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import time 3 | from flask import Flask, request, Response,render_template 4 | import json 5 | from cam.base_camera import BaseCamera 6 | from deepsort.detector import build_detector 7 | from deepsort.deep_sort import build_tracker 8 | from deepsort.utils.draw import draw_boxes 9 | from deepsort.detector.YOLOv3 import YOLOv3 10 | from numba import njit 11 | from vizer.draw import draw_boxes as db 12 | # Initialize Flask application 13 | import os 14 | os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE" 15 | app = Flask(__name__) 16 | 17 | class_names = [c.strip() for c in open(r'cam/coco.names').readlines()] 18 | 19 | yolo = YOLOv3(r"deepsort/detector/YOLOv3/cfg/yolo_v3.cfg", r"deepsort/detector/YOLOv3/weight/yolov3.weights", 20 | r"cam/coco.names") 21 | detector = build_detector(use_cuda=False) 22 | deepsort = build_tracker(use_cuda=False) 23 | 24 | file_name = ['jpg','jpeg','png'] 25 | video_name = ['mp4','avi'] 26 | 27 | # API that returns image with detections on it 28 | @njit(parallel = True) 29 | @app.route('/images', methods= ['POST']) 30 | def get_image(): 31 | image = request.files["images"] 32 | image_name = image.filename 33 | 34 | if image_name.split('.')[-1] in video_name: 35 | with open('./result.txt', 'w') as f: 36 | f.write(image_name) 37 | 38 | image.save(os.path.join(os.getcwd(), image_name)) 39 | 40 | if image_name.split(".")[-1] in file_name: 41 | a = time.time() 42 | img = cv2.imread(image_name) 43 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 44 | bbox, cls_conf, cls_ids = yolo(img) 45 | if bbox is not None: 46 | img = db(img, bbox, cls_ids, cls_conf, class_name_map=class_names) 47 | img = img[:, :, (2, 1, 0)] 48 | _, img_encoded = cv2.imencode('.png', img) 49 | response = img_encoded.tobytes() 50 | print(time.time()-a) 51 | # os.remove(image_name) 52 | try: 53 | return Response(response=response, status=200, mimetype='image/png') 54 | except: 55 | return render_template('index1.html') 56 | else: 57 | return render_template('real-time.html') 58 | 59 | 60 | class Camera(BaseCamera): 61 | @staticmethod 62 | def frames(): 63 | with open('./result.txt', 'r') as f: 64 | image_name = f.read() 65 | fi_name = image_name 66 | cam = cv2.VideoCapture(image_name) 67 | y = 0 68 | sum = 0 69 | a = time.time() 70 | while True: 71 | b = time.time() - a 72 | if b > 80: 73 | break 74 | with open('./result.txt', 'r') as f: 75 | image_name = f.read() 76 | if image_name != fi_name: 77 | break 78 | ret,img = cam.read() 79 | if ret: 80 | if CameraParams.gray: 81 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 82 | yield cv2.imencode('.jpg', img)[1].tobytes() 83 | elif CameraParams.gaussian: 84 | if y == 0: 85 | cam = cv2.VideoCapture(image_name) 86 | sum = sum - 1 87 | y = 1 88 | de_sum = [] 89 | im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 90 | bbox_xywh, cls_conf, cls_ids = detector(im) 91 | 92 | mask = cls_ids == 0 93 | new_bbox_xywh = bbox_xywh[mask] 94 | new_bbox_xywh[:, 3:] *= 1.2 95 | 96 | new_cls_conf = cls_conf[mask] 97 | outputs = deepsort.update(new_bbox_xywh, new_cls_conf, im) 98 | if len(outputs) > 0: 99 | bbox_xyxy = outputs[:, :4] 100 | identities = outputs[:, -1] 101 | for id in identities: 102 | # if id not in de_sum: 103 | de_sum.append(id) 104 | 105 | img = draw_boxes(img, bbox_xyxy, identities) 106 | de_sum = set(de_sum) 107 | text = "people " 108 | if (len(de_sum) > 0): 109 | text = text + str(len(de_sum)) 110 | else: 111 | text = text + str(0) 112 | cv2.putText(img, text, (30, 50), cv2.FONT_HERSHEY_COMPLEX, 2, (250, 250, 0), 8) 113 | yield cv2.imencode('.jpg', img)[1].tobytes() 114 | elif CameraParams.sobel: 115 | img = cv2.Sobel(img,cv2.CV_64F,1,0,ksize=5) # x 116 | img = cv2.Sobel(img,cv2.CV_64F,0,1,ksize=5) # y 117 | yield cv2.imencode('.jpg', img)[1].tobytes() 118 | elif CameraParams.canny: 119 | img = cv2.Canny(img, 100, 200, 3, L2gradient=True) 120 | yield cv2.imencode('.jpg', img)[1].tobytes() 121 | else: 122 | cam = cv2.VideoCapture(image_name) 123 | sum = sum + 1 124 | if sum == 2: 125 | break 126 | class CameraParams(): 127 | 128 | gray = False 129 | gaussian = False 130 | sobel = False 131 | canny = False 132 | 133 | @njit(parallel = True) 134 | def __init__(self, gray, gaussian, sobel, canny): 135 | self.gray = gray 136 | self.gaussian = gaussian 137 | self.sobel = sobel 138 | self.canny = canny 139 | 140 | @njit(parallel = True) 141 | @app.route('/') 142 | def upload_file(): 143 | return render_template('index1.html') 144 | 145 | @njit(parallel = True) 146 | @app.route('/cameraParams', methods=['GET', 'POST']) 147 | def cameraParams(): 148 | if request.method == 'GET': 149 | data = { 150 | 'gray': CameraParams.gray, 151 | 'gaussian': CameraParams.gaussian, 152 | 'sobel': CameraParams.sobel, 153 | 'canny': CameraParams.canny, 154 | } 155 | return app.response_class(response=json.dumps(data), 156 | status=200, 157 | mimetype='application/json') 158 | elif request.method == 'POST': 159 | try: 160 | data = request.form.to_dict() 161 | CameraParams.gray = str_to_bool(data['gray']) 162 | CameraParams.gaussian = str_to_bool(data['gaussian']) 163 | CameraParams.sobel = str_to_bool(data['sobel']) 164 | CameraParams.canny = str_to_bool(data['canny']) 165 | message = {'message': 'Success'} 166 | response = app.response_class(response=json.dumps(message), 167 | status=200, 168 | mimetype='application/json') 169 | return response 170 | except Exception as e: 171 | print(e) 172 | response = app.response_class(response=json.dumps(e), 173 | status=400, 174 | mimetype='application/json') 175 | return response 176 | else: 177 | data = { "error": "Method not allowed. Please GET or POST request!" } 178 | return app.response_class(response=json.dumps(data), 179 | status=400, 180 | mimetype='application/json') 181 | 182 | @njit(parallel = True) 183 | @app.route('/realtime') 184 | def realtime(): 185 | return render_template('real-time.html') 186 | 187 | ########get path 188 | @njit(parallel = True) 189 | @app.route('/video_feed') 190 | def video_feed(): 191 | """Video streaming route. Put this in the src attribute of an img tag.""" 192 | return Response(genWeb(Camera()), 193 | mimetype='multipart/x-mixed-replace; boundary=frame') 194 | 195 | def genWeb(camera): 196 | """Video streaming generator function.""" 197 | while True: 198 | frame = camera.get_frame() 199 | yield (b'--frame\r\n' 200 | b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n') 201 | @njit() 202 | def str_to_bool(s): 203 | if s == "true": 204 | return True 205 | elif s == "false": 206 | return False 207 | else: 208 | raise ValueError 209 | 210 | if __name__ == '__main__': 211 | app.run(debug=True, host = '127.0.0.1', port=5000) 212 | --------------------------------------------------------------------------------