├── .gitignore ├── LICENSE ├── README.md ├── assets ├── sample-inf.txt ├── sample_inf.png ├── sort-io.png ├── sort-mod.png ├── sort-mod.txt └── velon-2019-creds.gif ├── classy_track.py ├── download_weights.sh ├── requirements.txt ├── sort ├── .gitignore ├── LICENSE ├── README.md ├── Untitled.ipynb ├── data │ └── train │ │ ├── ADL-Rundle-6 │ │ └── det │ │ │ └── det.txt │ │ ├── ADL-Rundle-8 │ │ └── det │ │ │ └── det.txt │ │ ├── ETH-Bahnhof │ │ └── det │ │ │ └── det.txt │ │ ├── ETH-Pedcross2 │ │ └── det │ │ │ └── det.txt │ │ ├── ETH-Sunnyday │ │ └── det │ │ │ └── det.txt │ │ ├── KITTI-13 │ │ └── det │ │ │ └── det.txt │ │ ├── KITTI-17 │ │ └── det │ │ │ └── det.txt │ │ ├── PETS09-S2L1 │ │ └── det │ │ │ └── det.txt │ │ ├── TUD-Campus │ │ └── det │ │ │ └── det.txt │ │ ├── TUD-Stadtmitte │ │ └── det │ │ │ └── det.txt │ │ └── Venice-2 │ │ └── det │ │ └── det.txt ├── requirements.txt └── sort.py └── yolov5 ├── .dockerignore ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ ├── feature-request.md │ └── question.md └── workflows │ ├── ci-testing.yml │ ├── codeql-analysis.yml │ ├── greetings.yml │ ├── rebase.yml │ └── stale.yml ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── detect.py ├── hubconf.py ├── models ├── __init__.py ├── common.py ├── experimental.py ├── export.py ├── hub │ ├── yolov3-spp.yaml │ ├── yolov3-tiny.yaml │ ├── yolov3.yaml │ ├── yolov5-fpn.yaml │ └── yolov5-panet.yaml ├── yolo.py ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml └── yolov5x.yaml ├── requirements.txt ├── test.py ├── train.py ├── tutorial.ipynb ├── utils ├── __init__.py ├── activations.py ├── autoanchor.py ├── datasets.py ├── general.py ├── google_app_engine │ ├── Dockerfile │ ├── additional_requirements.txt │ └── app.yaml ├── google_utils.py ├── loss.py ├── metrics.py ├── plots.py └── torch_utils.py └── weights └── download_weights.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | #PyTorch weights 132 | *.pt 133 | *.pth 134 | 135 | #Video files (output) 136 | *.mp4 137 | 138 | #output folder 139 | inference/output/ 140 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ℂ𝕝𝕒𝕤𝕤𝕪𝕊𝕆ℝ𝕋 2 | 3 | by [Jason Sohn (website: jasonsohn.com)](https://jasonsohn.com) 4 | 5 | ClassySORT is a simple real-time multi-object tracker (MOT) that works for any kind of object class (not just people). 6 | 7 | ![demo-footage](assets/velon-2019-creds.gif) 8 | 9 | ## Introduction 10 | 11 | ClassySORT is designed to be a state-of-the-art (SOTA) multi-object tracker (MOT) for use on your own projects. And bcause the You-only-look-once algorithm (YOLO) detector is pretrained on COCO dataset, ClassySORT can detect and count and track 80 different kinds of common objects 'out of the box'. 12 | 13 | Tested on Pop_OS! 20.04 (similar to Ubuntu) and NVIDIA RTX 2070s. 14 | 15 | Modifying it is exactly the same process as training YOLOv5 with your own dataset. [How do I do that?](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) 16 | 17 | **ClassySORT implements** 18 | + [ultralytics/YOLOv5](https://github.com/ultralytics/yolov5/wiki) with no modifications 19 | + [abewley/SORT](https://github.com/abewley/sort) with minor modifications 20 | 21 | This repository uses a fixed version of YOLOv5 to ensure compatbility. Replacing the YOLOv5 code to the updated ultralytics/YOLOv5 code may result in breaking changes. If you are able to this without issues, please submit a pull request. 22 | 23 | If you only need to track people, or have the resources to train a model from scratch with your own dataset, see 'More Complex MOTs' section below. 24 | 25 | ## Using ClassySORT 26 | 27 | Clone this repository 28 | 29 | ```bash 30 | git clone https://github.com/tensorturtle/classy-sort-yolov5.git 31 | cd classy-sort-yolov5 32 | ``` 33 | 34 | ### Install Requirements 35 | 36 | Python 3.8 or later with all requirements.txt. To install run: 37 | 38 | ```bash 39 | pip install -r requirements.txt 40 | ``` 41 | 42 | ### Download YOLOv5 weights 43 | 44 | ```bash 45 | ./download_weights.sh 46 | ``` 47 | This script will save yolov5 weights to `yolov5/weights/` directory. 48 | 49 | ### Run Tracking 50 | 51 | To run the tracker on your own video and view the tracked bounding boxes, run: 52 | 53 | ```bash 54 | python classy_track.py --source /path/to/video.mp4 --view-img 55 | ``` 56 | 57 | To get a summary of arguments run: 58 | 59 | ```bash 60 | python classy_track.py -h 61 | ``` 62 | 63 | The text results are saved to `/inference/output/` from the array above in the following format. That location in the script is also a good point to plug your own programs into. 64 | 65 | The saved text file contains the following information: 66 | 67 | ```bash 68 | [frame_index, x_left_top, y_left_top, x_right_bottom, y_right_bottom, object_category, u_dot, v_dot, s_dot, object_id] 69 | ``` 70 | 71 | where 72 | 73 | + u_dot: time derivative of x_center in pixels 74 | + v_dot: time derivative of y_center in pixels 75 | + s_dot: time derivative of scale (area of bbox) in pixels 76 | 77 | ## Implementation Details 78 | 79 | ### Modifications to SORT 80 | 81 | #### 1. Class-aware Tracking 82 | 83 | The original implementation of SORT threw away YOLO's object class information (0: person, 1: bike, etc.). 84 | I wanted to keep that information, so I added a `detclass` attribute to `KalmanBoxTracker` object in `sort.py`: 85 | 86 | ![modifications_to_sort_schematic](assets/sort-mod.png) 87 | 88 | #### 2. Kalman Filter parameters 89 | 90 | I found that for my own dataset in which bounding boxes change size fairly quickly, the default Q value (process covariance) was too low. I recommend you try experimenting with them. 91 | 92 | 93 | ## More Complex MOTs 94 | If you only need to track people, or have the resources to train a model from scratch with your own dataset, then I recommend [bostom/Yolov5_DeepSort_PyTorch](https://github.com/mikel-brostrom/Yolov5_DeepSort_Pytorch). 95 | DeepSORT adds a separately trained neural network on top of SORT, which increases accuracy for human detections but slightly decreases performance. 96 | It also means that using your custom dataset involves training both YOLO and DeepSORT's 'deep association metric' 97 | 98 | For a 'bag of tricks' optimized version of YOLOv5 + DeepSORT, see [GeekAlexis/FastMOT](https://github.com/GeekAlexis/FastMOT) 99 | 100 | ## License 101 | 102 | ClassySORT is released under the GPL License version 3 to promote the open use of the tracker and future improvements. 103 | Among other things, this means that code from this repository cannot be used for closed source distributions, 104 | and you must license any derived code as GPL3 as well. 105 | -------------------------------------------------------------------------------- /assets/sample-inf.txt: -------------------------------------------------------------------------------- 1 | $ python classy_track.py --source /path/to/video.mp4 --view-img --save-txt --save-img 2 | 3 | ... 4 | 5 | torch.Size([640, 1080]) - 1 person - 2 bicycle Done. (0.00942351224562) 6 | saving img! 7 | saving video! 8 | video 1/1 (234/250) /path/to/video.mp4 9 | 10 | Input into SORT: 11 | [[ 1308 530 1567 834 0.89697 1] 12 | [ 1317 283 1552 698 0.8501 0] 13 | [ 732 495 753 537 0.35449 1]] 14 | 15 | Output from SORT: 16 | [[ 1322.2 278.85 1546.5 701.64 1 29] 17 | [ 733.19 494.52 751.8 536.72 0 16] 18 | [ 1315.3 527.35 1559.1 835.45 1 2]] 19 | 20 | ... 21 | 22 | -------------------------------------------------------------------------------- /assets/sample_inf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorturtle/classy-sort-yolov5/adc650f966b172fd925aff6650e31a03f09fc4c6/assets/sample_inf.png -------------------------------------------------------------------------------- /assets/sort-io.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorturtle/classy-sort-yolov5/adc650f966b172fd925aff6650e31a03f09fc4c6/assets/sort-io.png -------------------------------------------------------------------------------- /assets/sort-mod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorturtle/classy-sort-yolov5/adc650f966b172fd925aff6650e31a03f09fc4c6/assets/sort-mod.png -------------------------------------------------------------------------------- /assets/sort-mod.txt: -------------------------------------------------------------------------------- 1 | class KalmanBoxTracker(object): 2 | ''' 3 | This class represents the internal state of individual objects observed as bbox 4 | ''' 5 | def __init__(self, bbox): 6 | ... 7 | self.detclass = bbox[5] #detected object class is stored as object attribute 8 | 9 | def update(self, bbox): 10 | ... 11 | 12 | def predict(self): 13 | ... 14 | -------------------------------------------------------------------------------- /assets/velon-2019-creds.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorturtle/classy-sort-yolov5/adc650f966b172fd925aff6650e31a03f09fc4c6/assets/velon-2019-creds.gif -------------------------------------------------------------------------------- /classy_track.py: -------------------------------------------------------------------------------- 1 | """ 2 | ClassySORT 3 | 4 | YOLO v5(image segmentation) + vanilla SORT(multi-object tracker) implementation 5 | that is aware of the tracked object category. 6 | 7 | This is for people who want a real-time multiple object tracker (MOT) 8 | that can track any kind of object with no additional training. 9 | 10 | If you only need to track people, then I recommend YOLOv5 + DeepSORT implementations. 11 | DeepSORT adds a separately trained neural network on top of SORT, 12 | which increases accuracy for human detections but decreases performance slightly. 13 | 14 | 15 | Copyright (C) 2020-2021 Jason Sohn tensorturtle@gmail.com 16 | 17 | 18 | === start GNU License === 19 | 20 | This program is free software: you can redistribute it and/or modify 21 | it under the terms of the GNU General Public License as published by 22 | the Free Software Foundation, either version 3 of the License, or 23 | (at your option) any later version. 24 | 25 | This program is distributed in the hope that it will be useful, 26 | but WITHOUT ANY WARRANTY; without even the implied warranty of 27 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 28 | GNU General Public License for more details. 29 | 30 | You should have received a copy of the GNU General Public License 31 | along with this program. If not, see . 32 | 33 | === end GNU License === 34 | """ 35 | 36 | # python interpreter searchs these subdirectories for modules 37 | import sys 38 | sys.path.insert(0, './yolov5') 39 | sys.path.insert(0, './sort') 40 | 41 | import argparse 42 | import os 43 | import platform 44 | import shutil 45 | import time 46 | from pathlib import Path 47 | import cv2 48 | import torch 49 | import torch.backends.cudnn as cudnn 50 | 51 | #yolov5 52 | from yolov5.utils.datasets import LoadImages, LoadStreams 53 | from yolov5.utils.general import check_img_size, non_max_suppression, scale_coords 54 | from yolov5.utils.torch_utils import select_device, time_synchronized 55 | 56 | #SORT 57 | import skimage 58 | from sort import * 59 | 60 | torch.set_printoptions(precision=3) 61 | 62 | palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1) 63 | 64 | 65 | def bbox_rel(*xyxy): 66 | """" Calculates the relative bounding box from absolute pixel values. """ 67 | bbox_left = min([xyxy[0].item(), xyxy[2].item()]) 68 | bbox_top = min([xyxy[1].item(), xyxy[3].item()]) 69 | bbox_w = abs(xyxy[0].item() - xyxy[2].item()) 70 | bbox_h = abs(xyxy[1].item() - xyxy[3].item()) 71 | x_c = (bbox_left + bbox_w / 2) 72 | y_c = (bbox_top + bbox_h / 2) 73 | w = bbox_w 74 | h = bbox_h 75 | return x_c, y_c, w, h 76 | 77 | 78 | def compute_color_for_labels(label): 79 | """ 80 | Simple function that adds fixed color depending on the class 81 | """ 82 | color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette] 83 | return tuple(color) 84 | 85 | 86 | def draw_boxes(img, bbox, identities=None, categories=None, names=None, offset=(0, 0)): 87 | for i, box in enumerate(bbox): 88 | x1, y1, x2, y2 = [int(i) for i in box] 89 | x1 += offset[0] 90 | x2 += offset[0] 91 | y1 += offset[1] 92 | y2 += offset[1] 93 | # box text and bar 94 | cat = int(categories[i]) if categories is not None else 0 95 | 96 | id = int(identities[i]) if identities is not None else 0 97 | 98 | color = compute_color_for_labels(id) 99 | 100 | label = f'{names[cat]} | {id}' 101 | t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2, 2)[0] 102 | cv2.rectangle(img, (x1, y1), (x2, y2), color, 3) 103 | cv2.rectangle( 104 | img, (x1, y1), (x1 + t_size[0] + 3, y1 + t_size[1] + 4), color, -1) 105 | cv2.putText(img, label, (x1, y1 + 106 | t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 2, [255, 255, 255], 2) 107 | return img 108 | 109 | def detect(opt, *args): 110 | out, source, weights, view_img, save_txt, imgsz, save_img, sort_max_age, sort_min_hits, sort_iou_thresh= \ 111 | opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size, opt.save_img, opt.sort_max_age, opt.sort_min_hits, opt.sort_iou_thresh 112 | 113 | webcam = source == '0' or source.startswith( 114 | 'rtsp') or source.startswith('http') or source.endswith('.txt') 115 | # Initialize SORT 116 | sort_tracker = Sort(max_age=sort_max_age, 117 | min_hits=sort_min_hits, 118 | iou_threshold=sort_iou_thresh) # {plug into parser} 119 | 120 | 121 | # Directory and CUDA settings for yolov5 122 | device = select_device(opt.device) 123 | if os.path.exists(out): 124 | shutil.rmtree(out) # delete output folder 125 | os.makedirs(out) # make new output folder 126 | half = device.type != 'cpu' # half precision only supported on CUDA 127 | 128 | # Load yolov5 model 129 | model = torch.load(weights, map_location=device)['model'].float() #load to FP32. yolov5s.pt file is a dictionary, so we retrieve the model by indexing its key 130 | model.to(device).eval() 131 | if half: 132 | model.half() #to FP16 133 | 134 | # Set DataLoader 135 | vid_path, vid_writer = None, None 136 | 137 | if webcam: 138 | view_img = True 139 | cudnn.benchmark = True # set True to speed up constant image size inference 140 | dataset = LoadStreams(source, img_size=imgsz) 141 | else: 142 | dataset = LoadImages(source, img_size=imgsz) 143 | 144 | # get names of object categories from yolov5.pt model 145 | names = model.module.names if hasattr(model, 'module') else model.names 146 | 147 | # Run inference 148 | t0 = time.time() 149 | img = torch.zeros((1,3,imgsz,imgsz), device=device) #init img 150 | 151 | # Run once (throwaway) 152 | _ = model(img.half() if half else img) if device.type != 'cpu' else None 153 | 154 | save_path = str(Path(out)) 155 | txt_path = str(Path(out))+'/results.txt' 156 | 157 | for frame_idx, (path, img, im0s, vid_cap) in enumerate(dataset): #for every frame 158 | img= torch.from_numpy(img).to(device) 159 | img = img.half() if half else img.float() #unint8 to fp16 or fp32 160 | img /= 255.0 #normalize to between 0 and 1. 161 | if img.ndimension()==3: 162 | img = img.unsqueeze(0) 163 | 164 | # Inference 165 | t1 = time_synchronized() 166 | pred = model(img, augment=opt.augment)[0] 167 | 168 | # Apply NMS 169 | pred = non_max_suppression( 170 | pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) 171 | t2 = time_synchronized() 172 | 173 | # Process detections 174 | for i, det in enumerate(pred): #for each detection in this frame 175 | if webcam: # batch_size >= 1 176 | p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() 177 | else: 178 | p, s, im0 = path, '', im0s 179 | 180 | s += f'{img.shape[2:]}' #print image size and detection report 181 | save_path = str(Path(out) / Path(p).name) 182 | 183 | # Rescale boxes from img_size (temporarily downscaled size) to im0 (native) size 184 | det[:, :4] = scale_coords( 185 | img.shape[2:], det[:, :4], im0.shape).round() 186 | 187 | for c in det[:, -1].unique(): #for each unique object category 188 | n = (det[:, -1] ==c).sum() #number of detections per class 189 | s += f' - {n} {names[int(c)]}' 190 | 191 | dets_to_sort = np.empty((0,6)) 192 | 193 | # Pass detections to SORT 194 | # NOTE: We send in detected object class too 195 | for x1,y1,x2,y2,conf,detclass in det.cpu().detach().numpy(): 196 | dets_to_sort = np.vstack((dets_to_sort, np.array([x1, y1, x2, y2, conf, detclass]))) 197 | print('\n') 198 | print('Input into SORT:\n',dets_to_sort,'\n') 199 | 200 | # Run SORT 201 | tracked_dets = sort_tracker.update(dets_to_sort) 202 | 203 | print('Output from SORT:\n',tracked_dets,'\n') 204 | 205 | 206 | # draw boxes for visualization 207 | if len(tracked_dets)>0: 208 | bbox_xyxy = tracked_dets[:,:4] 209 | identities = tracked_dets[:, 8] 210 | categories = tracked_dets[:, 4] 211 | draw_boxes(im0, bbox_xyxy, identities, categories, names) 212 | 213 | # Write detections to file. NOTE: Not MOT-compliant format. 214 | if save_txt and len(tracked_dets) != 0: 215 | for j, tracked_dets in enumerate(tracked_dets): 216 | bbox_x1 = tracked_dets[0] 217 | bbox_y1 = tracked_dets[1] 218 | bbox_x2 = tracked_dets[2] 219 | bbox_y2 = tracked_dets[3] 220 | category = tracked_dets[4] 221 | u_overdot = tracked_dets[5] 222 | v_overdot = tracked_dets[6] 223 | s_overdot = tracked_dets[7] 224 | identity = tracked_dets[8] 225 | 226 | with open(txt_path, 'a') as f: 227 | f.write(f'{frame_idx},{bbox_x1},{bbox_y1},{bbox_x2},{bbox_y2},{category},{u_overdot},{v_overdot},{s_overdot},{identity}\n') 228 | 229 | print(f'{s} Done. ({t2-t1})') 230 | # Stream image results(opencv) 231 | if view_img: 232 | cv2.imshow(p,im0) 233 | if cv2.waitKey(1)==ord('q'): #q to quit 234 | raise StopIteration 235 | # Save video results 236 | if save_img: 237 | print('saving img!') 238 | if dataset.mode == 'image': 239 | cv2.imwrite(save_path, im0) 240 | else: 241 | print('saving video!') 242 | if vid_path != save_path: # new video 243 | vid_path = save_path 244 | if isinstance(vid_writer, cv2.VideoWriter): 245 | vid_writer.release() # release previous video writer 246 | 247 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 248 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 249 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 250 | vid_writer = cv2.VideoWriter( 251 | save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) 252 | vid_writer.write(im0) 253 | if save_txt or save_img: 254 | print('Results saved to %s' % os.getcwd() + os.sep + out) 255 | if platform == 'darwin': # MacOS 256 | os.system('open ' + save_path) 257 | 258 | print('Done. (%.3fs)' % (time.time() - t0)) 259 | 260 | if __name__ == '__main__': 261 | 262 | parser = argparse.ArgumentParser() 263 | parser.add_argument('--weights', type=str, 264 | default='yolov5/weights/yolov5s.pt', help='model.pt path') 265 | # file/folder, 0 for webcam 266 | parser.add_argument('--source', type=str, 267 | default='inference/images', help='source') 268 | parser.add_argument('--output', type=str, default='inference/output', 269 | help='output folder') # output folder 270 | parser.add_argument('--img-size', type=int, default=1080, 271 | help='inference size (pixels)') 272 | parser.add_argument('--conf-thres', type=float, 273 | default=0.3, help='object confidence threshold') 274 | parser.add_argument('--iou-thres', type=float, 275 | default=0.4, help='IOU threshold for NMS') 276 | parser.add_argument('--fourcc', type=str, default='mp4v', 277 | help='output video codec (verify ffmpeg support)') 278 | parser.add_argument('--device', default='', 279 | help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 280 | parser.add_argument('--view-img', action='store_true', 281 | help='display results') 282 | parser.add_argument('--save-img', action='store_true', 283 | help='save video file to output folder (disable for speed)') 284 | parser.add_argument('--save-txt', action='store_true', 285 | help='save results to *.txt') 286 | parser.add_argument('--classes', nargs='+', type=int, 287 | default=[i for i in range(80)], help='filter by class') #80 classes in COCO dataset 288 | parser.add_argument('--agnostic-nms', action='store_true', 289 | help='class-agnostic NMS') 290 | parser.add_argument('--augment', action='store_true', 291 | help='augmented inference') 292 | 293 | #SORT params 294 | parser.add_argument('--sort-max-age', type=int, default=5, 295 | help='keep track of object even if object is occluded or not detected in n frames') 296 | parser.add_argument('--sort-min-hits', type=int, default=2, 297 | help='start tracking only after n number of objects detected') 298 | parser.add_argument('--sort-iou-thresh', type=float, default=0.2, 299 | help='intersection-over-union threshold between two frames for association') 300 | 301 | args = parser.parse_args() 302 | args.img_size = check_img_size(args.img_size) 303 | print(args) 304 | 305 | with torch.no_grad(): 306 | detect(args) 307 | -------------------------------------------------------------------------------- /download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download latest models from https://github.com/ultralytics/yolov5/releases 3 | # Usage: 4 | # $ bash weights/download_weights.sh 5 | 6 | python - <=3.2.2 8 | numpy>=1.18.5 9 | opencv-python>=4.1.2 10 | Pillow 11 | PyYAML>=5.3 12 | scipy>=1.4.1 13 | tensorboard>=2.2 14 | torch>=1.7.0 15 | torchvision>=0.8.1 16 | tqdm>=4.41.0 17 | seaborn>=0.11.0 18 | pandas 19 | 20 | #SORT-------------------------------- 21 | filterpy==1.4.5 22 | scikit-image==0.17.2 23 | numpy 24 | lap==0.4.0 25 | -------------------------------------------------------------------------------- /sort/.gitignore: -------------------------------------------------------------------------------- 1 | output/ 2 | mot_benchmark 3 | -------------------------------------------------------------------------------- /sort/README.md: -------------------------------------------------------------------------------- 1 | SORT 2 | ===== 3 | 4 | A simple online and realtime tracking algorithm for 2D multiple object tracking in video sequences. 5 | See an example [video here](https://alex.bewley.ai/misc/SORT-MOT17-06-FRCNN.webm). 6 | 7 | By Alex Bewley 8 | 9 | ### Introduction 10 | 11 | SORT is a barebones implementation of a visual multiple object tracking framework based on rudimentary data association and state estimation techniques. It is designed for online tracking applications where only past and current frames are available and the method produces object identities on the fly. While this minimalistic tracker doesn't handle occlusion or re-entering objects its purpose is to serve as a baseline and testbed for the development of future trackers. 12 | 13 | SORT was initially described in [this paper](http://arxiv.org/abs/1602.00763). At the time of the initial publication, SORT was ranked the best *open source* multiple object tracker on the [MOT benchmark](https://motchallenge.net/results/2D_MOT_2015/). 14 | 15 | **Note:** A significant proportion of SORT's accuracy is attributed to the detections. 16 | For your convenience, this repo also contains *Faster* RCNN detections for the MOT benchmark sequences in the [benchmark format](https://motchallenge.net/instructions/). To run the detector yourself please see the original [*Faster* RCNN project](https://github.com/ShaoqingRen/faster_rcnn) or the python reimplementation of [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) by Ross Girshick. 17 | 18 | **Also see:** 19 | A new and improved version of SORT with a Deep Association Metric implemented in tensorflow is available at [https://github.com/nwojke/deep_sort](https://github.com/nwojke/deep_sort) . 20 | 21 | ### License 22 | 23 | SORT is released under the GPL License (refer to the LICENSE file for details) to promote the open use of the tracker and future improvements. If you require a permissive license contact Alex (alex@bewley.ai). 24 | 25 | ### Citing SORT 26 | 27 | If you find this repo useful in your research, please consider citing: 28 | 29 | @inproceedings{Bewley2016_sort, 30 | author={Bewley, Alex and Ge, Zongyuan and Ott, Lionel and Ramos, Fabio and Upcroft, Ben}, 31 | booktitle={2016 IEEE International Conference on Image Processing (ICIP)}, 32 | title={Simple online and realtime tracking}, 33 | year={2016}, 34 | pages={3464-3468}, 35 | keywords={Benchmark testing;Complexity theory;Detectors;Kalman filters;Target tracking;Visualization;Computer Vision;Data Association;Detection;Multiple Object Tracking}, 36 | doi={10.1109/ICIP.2016.7533003} 37 | } 38 | 39 | 40 | ### Dependencies: 41 | 42 | To install required dependencies run: 43 | ``` 44 | $ pip install -r requirements.txt 45 | ``` 46 | 47 | 48 | ### Demo: 49 | 50 | To run the tracker with the provided detections: 51 | 52 | ``` 53 | $ cd path/to/sort 54 | $ python sort.py 55 | ``` 56 | 57 | To display the results you need to: 58 | 59 | 1. Download the [2D MOT 2015 benchmark dataset](https://motchallenge.net/data/2D_MOT_2015/#download) 60 | 0. Create a symbolic link to the dataset 61 | ``` 62 | $ ln -s /path/to/MOT2015_challenge/data/2DMOT2015 mot_benchmark 63 | ``` 64 | 0. Run the demo with the ```--display``` flag 65 | ``` 66 | $ python sort.py --display 67 | ``` 68 | 69 | 70 | ### Main Results 71 | 72 | Using the [MOT challenge devkit](https://motchallenge.net/devkit/) the method produces the following results (as described in the paper). 73 | 74 | Sequence | Rcll | Prcn | FAR | GT MT PT ML| FP FN IDs FM| MOTA MOTP MOTAL 75 | --------------- |:----:|:----:|:----:|:-------------:|:-------------------:|:------------------: 76 | TUD-Campus | 68.5 | 94.3 | 0.21 | 8 6 2 0| 15 113 6 9| 62.7 73.7 64.1 77 | ETH-Sunnyday | 77.5 | 81.9 | 0.90 | 30 11 16 3| 319 418 22 54| 59.1 74.4 60.3 78 | ETH-Pedcross2 | 51.9 | 90.8 | 0.39 | 133 17 60 56| 330 3014 77 103| 45.4 74.8 46.6 79 | ADL-Rundle-8 | 44.3 | 75.8 | 1.47 | 28 6 16 6| 959 3781 103 211| 28.6 71.1 30.1 80 | Venice-2 | 42.5 | 64.8 | 2.75 | 26 7 9 10| 1650 4109 57 106| 18.6 73.4 19.3 81 | KITTI-17 | 67.1 | 92.3 | 0.26 | 9 1 8 0| 38 225 9 16| 60.2 72.3 61.3 82 | *Overall* | 49.5 | 77.5 | 1.24 | 234 48 111 75| 3311 11660 274 499| 34.0 73.3 35.1 83 | 84 | 85 | ### Using SORT in your own project 86 | 87 | Below is the gist of how to instantiate and update SORT. See the ['__main__'](https://github.com/abewley/sort/blob/master/sort.py#L239) section of [sort.py](https://github.com/abewley/sort/blob/master/sort.py#L239) for a complete example. 88 | 89 | from sort import * 90 | 91 | #create instance of SORT 92 | mot_tracker = Sort() 93 | 94 | # get detections 95 | ... 96 | 97 | # update SORT 98 | track_bbs_ids = mot_tracker.update(detections) 99 | 100 | # track_bbs_ids is a np array where each row contains a valid bounding box and track_id (last column) 101 | ... 102 | 103 | 104 | -------------------------------------------------------------------------------- /sort/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [] 9 | } 10 | ], 11 | "metadata": { 12 | "kernelspec": { 13 | "display_name": "Python 3", 14 | "language": "python", 15 | "name": "python3" 16 | }, 17 | "language_info": { 18 | "codemirror_mode": { 19 | "name": "ipython", 20 | "version": 3 21 | }, 22 | "file_extension": ".py", 23 | "mimetype": "text/x-python", 24 | "name": "python", 25 | "nbconvert_exporter": "python", 26 | "pygments_lexer": "ipython3", 27 | "version": "3.8.5" 28 | } 29 | }, 30 | "nbformat": 4, 31 | "nbformat_minor": 4 32 | } 33 | -------------------------------------------------------------------------------- /sort/requirements.txt: -------------------------------------------------------------------------------- 1 | filterpy==1.4.5 2 | scikit-image==0.17.2 3 | lap==0.4.0 4 | -------------------------------------------------------------------------------- /sort/sort.py: -------------------------------------------------------------------------------- 1 | """ 2 | MINOR MODIFICATION FOR ClassySORT: 3 | 4 | In the original implementation of SORT, 5 | it threw away the object classification category information 6 | For example, (0: person, 1: bike, etc.) 7 | 8 | I needed to keep that information for use in `Watchout`, 9 | so I added a `detclass` attribute to the `KalmanBoxTracker` object 10 | which stores YOLO detection object class information. 11 | With this modification, SORT returns data in the format: 12 | 13 | `[x_left_top, y_left_top, x_right_bottom, y_right_bottom, object_category, object_identification]` 14 | 15 | 16 | ========================================================================== 17 | 18 | 19 | SORT: A Simple, Online and Realtime Tracker 20 | Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai 21 | 22 | This program is free software: you can redistribute it and/or modify 23 | it under the terms of the GNU General Public License as published by 24 | the Free Software Foundation, either version 3 of the License, or 25 | (at your option) any later version. 26 | 27 | This program is distributed in the hope that it will be useful, 28 | but WITHOUT ANY WARRANTY; without even the implied warranty of 29 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 | GNU General Public License for more details. 31 | 32 | You should have received a copy of the GNU General Public License 33 | along with this program. If not, see . 34 | """ 35 | from __future__ import print_function 36 | 37 | import os 38 | import numpy as np 39 | import matplotlib 40 | matplotlib.use('TkAgg') 41 | import matplotlib.pyplot as plt 42 | import matplotlib.patches as patches 43 | from skimage import io 44 | 45 | import glob 46 | import time 47 | import argparse 48 | from filterpy.kalman import KalmanFilter 49 | 50 | np.random.seed(0) 51 | 52 | def linear_assignment(cost_matrix): 53 | try: 54 | import lap #linear assignment problem solver 55 | _, x, y = lap.lapjv(cost_matrix, extend_cost = True) 56 | return np.array([[y[i],i] for i in x if i>=0]) 57 | except ImportError: 58 | from scipy.optimize import linear_sum_assignment 59 | x,y = linear_sum_assignment(cost_matrix) 60 | return np.array(list(zip(x,y))) 61 | 62 | def iou_batch(bb_test, bb_gt): 63 | """ 64 | From SORT: Computes IOU between two boxes in the form [x1,y1,x2,y2] 65 | """ 66 | bb_gt = np.expand_dims(bb_gt, 0) 67 | bb_test = np.expand_dims(bb_test, 1) 68 | 69 | xx1 = np.maximum(bb_test[...,0], bb_gt[..., 0]) 70 | yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1]) 71 | xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2]) 72 | yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3]) 73 | w = np.maximum(0., xx2 - xx1) 74 | h = np.maximum(0., yy2 - yy1) 75 | wh = w * h 76 | o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) 77 | + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh) 78 | return(o) 79 | 80 | def convert_bbox_to_z(bbox): 81 | """ 82 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the center of the box and s is the scale/area and r is the aspect ratio 83 | """ 84 | w = bbox[2] - bbox[0] 85 | h = bbox[3] - bbox[1] 86 | x = bbox[0] + w/2. 87 | y = bbox[1] + h/2. 88 | s = w * h #scale is just area 89 | r = w / float(h) 90 | return np.array([x, y, s, r]).reshape((4, 1)) 91 | 92 | def convert_x_to_bbox(x, score=None): 93 | """ 94 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form 95 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right 96 | """ 97 | w = np.sqrt(x[2] * x[3]) 98 | h = x[2] / w 99 | if(score==None): 100 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4)) 101 | else: 102 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5)) 103 | 104 | class KalmanBoxTracker(object): 105 | """ 106 | This class represents the internal state of individual tracked objects observed as bbox. 107 | """ 108 | count = 0 109 | def __init__(self, bbox): 110 | """ 111 | Initialize a tracker using initial bounding box 112 | 113 | Parameter 'bbox' must have 'detected class' int number at the -1 position. 114 | """ 115 | self.kf = KalmanFilter(dim_x=7, dim_z=4) 116 | self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0],[0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]]) 117 | self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]]) 118 | 119 | self.kf.R[2:,2:] *= 10. # R: Covariance matrix of measurement noise (set to high for noisy inputs -> more 'inertia' of boxes') 120 | self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities 121 | self.kf.P *= 10. 122 | self.kf.Q[-1,-1] *= 0.5 # Q: Covariance matrix of process noise (set to high for erratically moving things) 123 | self.kf.Q[4:,4:] *= 0.5 124 | 125 | self.kf.x[:4] = convert_bbox_to_z(bbox) # STATE VECTOR 126 | self.time_since_update = 0 127 | self.id = KalmanBoxTracker.count 128 | KalmanBoxTracker.count += 1 129 | self.history = [] 130 | self.hits = 0 131 | self.hit_streak = 0 132 | self.age = 0 133 | 134 | #keep yolov5 detected class information 135 | self.detclass = bbox[5] 136 | 137 | def update(self, bbox): 138 | """ 139 | Updates the state vector with observed bbox 140 | """ 141 | self.time_since_update = 0 142 | self.history = [] 143 | self.hits += 1 144 | self.hit_streak += 1 145 | self.kf.update(convert_bbox_to_z(bbox)) 146 | self.detclass = bbox[5] 147 | 148 | def predict(self): 149 | """ 150 | Advances the state vector and returns the predicted bounding box estimate 151 | """ 152 | if((self.kf.x[6]+self.kf.x[2])<=0): 153 | self.kf.x[6] *= 0.0 154 | self.kf.predict() 155 | self.age += 1 156 | if(self.time_since_update>0): 157 | self.hit_streak = 0 158 | self.time_since_update += 1 159 | self.history.append(convert_x_to_bbox(self.kf.x)) 160 | return self.history[-1] 161 | 162 | def get_state(self): 163 | """ 164 | Returns the current bounding box estimate 165 | # test 166 | arr1 = np.array([[1,2,3,4]]) 167 | arr2 = np.array([0]) 168 | arr3 = np.expand_dims(arr2, 0) 169 | np.concatenate((arr1,arr3), axis=1) 170 | """ 171 | arr_detclass = np.expand_dims(np.array([self.detclass]), 0) 172 | 173 | arr_u_dot = np.expand_dims(self.kf.x[4],0) 174 | arr_v_dot = np.expand_dims(self.kf.x[5],0) 175 | arr_s_dot = np.expand_dims(self.kf.x[6],0) 176 | 177 | return np.concatenate((convert_x_to_bbox(self.kf.x), arr_detclass, arr_u_dot, arr_v_dot, arr_s_dot), axis=1) 178 | 179 | def associate_detections_to_trackers(detections, trackers, iou_threshold = 0.3): 180 | """ 181 | Assigns detections to tracked object (both represented as bounding boxes) 182 | Returns 3 lists of 183 | 1. matches, 184 | 2. unmatched_detections 185 | 3. unmatched_trackers 186 | """ 187 | if(len(trackers)==0): 188 | return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int) 189 | 190 | iou_matrix = iou_batch(detections, trackers) 191 | 192 | if min(iou_matrix.shape) > 0: 193 | a = (iou_matrix > iou_threshold).astype(np.int32) 194 | if a.sum(1).max() == 1 and a.sum(0).max() ==1: 195 | matched_indices = np.stack(np.where(a), axis=1) 196 | else: 197 | matched_indices = linear_assignment(-iou_matrix) 198 | else: 199 | matched_indices = np.empty(shape=(0,2)) 200 | 201 | unmatched_detections = [] 202 | for d, det in enumerate(detections): 203 | if(d not in matched_indices[:,0]): 204 | unmatched_detections.append(d) 205 | 206 | unmatched_trackers = [] 207 | for t, trk in enumerate(trackers): 208 | if(t not in matched_indices[:,1]): 209 | unmatched_trackers.append(t) 210 | 211 | #filter out matched with low IOU 212 | matches = [] 213 | for m in matched_indices: 214 | if(iou_matrix[m[0], m[1]]= self.min_hits or self.frame_count <= self.min_hits): 279 | ret.append(np.concatenate((d, [trk.id+1])).reshape(1,-1)) #+1'd because MOT benchmark requires positive value 280 | i -= 1 281 | #remove dead tracklet 282 | if(trk.time_since_update >self.max_age): 283 | self.trackers.pop(i) 284 | if(len(ret) > 0): 285 | return np.concatenate(ret) 286 | return np.empty((0,6)) 287 | 288 | def parse_args(): 289 | """Parse input arguments.""" 290 | parser = argparse.ArgumentParser(description='SORT demo') 291 | parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true') 292 | parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data') 293 | parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train') 294 | parser.add_argument("--max_age", 295 | help="Maximum number of frames to keep alive a track without associated detections.", 296 | type=int, default=1) 297 | parser.add_argument("--min_hits", 298 | help="Minimum number of associated detections before track is initialised.", 299 | type=int, default=3) 300 | parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3) 301 | args = parser.parse_args() 302 | return args 303 | 304 | if __name__ == '__main__': 305 | # all train 306 | args = parse_args() 307 | display = args.display 308 | phase = args.phase 309 | total_time = 0.0 310 | total_frames = 0 311 | colours = np.random.rand(32, 3) #used only for display 312 | if(display): 313 | if not os.path.exists('mot_benchmark'): 314 | print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n') 315 | exit() 316 | plt.ion() 317 | fig = plt.figure() 318 | ax1 = fig.add_subplot(111, aspect='equal') 319 | 320 | if not os.path.exists('output'): 321 | os.makedirs('output') 322 | pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt') 323 | for seq_dets_fn in glob.glob(pattern): 324 | mot_tracker = Sort(max_age=args.max_age, 325 | min_hits=args.min_hits, 326 | iou_threshold=args.iou_threshold) #create instance of the SORT tracker 327 | seq_dets = np.loadtxt(seq_dets_fn, delimiter=',') 328 | seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0] 329 | 330 | with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file: 331 | print("Processing %s."%(seq)) 332 | for frame in range(int(seq_dets[:,0].max())): 333 | frame += 1 #detection and frame numbers begin at 1 334 | dets = seq_dets[seq_dets[:, 0]==frame, 2:7] 335 | dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2] 336 | total_frames += 1 337 | 338 | if(display): 339 | fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame)) 340 | im =io.imread(fn) 341 | ax1.imshow(im) 342 | plt.title(seq + ' Tracked Targets') 343 | 344 | start_time = time.time() 345 | trackers = mot_tracker.update(dets) 346 | cycle_time = time.time() - start_time 347 | total_time += cycle_time 348 | 349 | for d in trackers: 350 | print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file) 351 | if(display): 352 | d = d.astype(np.int32) 353 | ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:])) 354 | 355 | if(display): 356 | fig.canvas.flush_events() 357 | plt.draw() 358 | ax1.cla() 359 | 360 | print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time)) 361 | 362 | if(display): 363 | print("Note: to get real runtime results run without the option: --display") 364 | -------------------------------------------------------------------------------- /yolov5/.dockerignore: -------------------------------------------------------------------------------- 1 | # Repo-specific DockerIgnore ------------------------------------------------------------------------------------------- 2 | #.git 3 | .cache 4 | .idea 5 | runs 6 | output 7 | coco 8 | storage.googleapis.com 9 | 10 | data/samples/* 11 | **/results*.txt 12 | *.jpg 13 | 14 | # Neural Network weights ----------------------------------------------------------------------------------------------- 15 | **/*.weights 16 | **/*.pt 17 | **/*.pth 18 | **/*.onnx 19 | **/*.mlmodel 20 | **/*.torchscript 21 | 22 | 23 | # Below Copied From .gitignore ----------------------------------------------------------------------------------------- 24 | # Below Copied From .gitignore ----------------------------------------------------------------------------------------- 25 | 26 | 27 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 28 | # Byte-compiled / optimized / DLL files 29 | __pycache__/ 30 | *.py[cod] 31 | *$py.class 32 | 33 | # C extensions 34 | *.so 35 | 36 | # Distribution / packaging 37 | .Python 38 | env/ 39 | build/ 40 | develop-eggs/ 41 | dist/ 42 | downloads/ 43 | eggs/ 44 | .eggs/ 45 | lib/ 46 | lib64/ 47 | parts/ 48 | sdist/ 49 | var/ 50 | wheels/ 51 | *.egg-info/ 52 | wandb/ 53 | .installed.cfg 54 | *.egg 55 | 56 | # PyInstaller 57 | # Usually these files are written by a python script from a template 58 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 59 | *.manifest 60 | *.spec 61 | 62 | # Installer logs 63 | pip-log.txt 64 | pip-delete-this-directory.txt 65 | 66 | # Unit test / coverage reports 67 | htmlcov/ 68 | .tox/ 69 | .coverage 70 | .coverage.* 71 | .cache 72 | nosetests.xml 73 | coverage.xml 74 | *.cover 75 | .hypothesis/ 76 | 77 | # Translations 78 | *.mo 79 | *.pot 80 | 81 | # Django stuff: 82 | *.log 83 | local_settings.py 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # pyenv 102 | .python-version 103 | 104 | # celery beat schedule file 105 | celerybeat-schedule 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # dotenv 111 | .env 112 | 113 | # virtualenv 114 | .venv* 115 | venv*/ 116 | ENV*/ 117 | 118 | # Spyder project settings 119 | .spyderproject 120 | .spyproject 121 | 122 | # Rope project settings 123 | .ropeproject 124 | 125 | # mkdocs documentation 126 | /site 127 | 128 | # mypy 129 | .mypy_cache/ 130 | 131 | 132 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 133 | 134 | # General 135 | .DS_Store 136 | .AppleDouble 137 | .LSOverride 138 | 139 | # Icon must end with two \r 140 | Icon 141 | Icon? 142 | 143 | # Thumbnails 144 | ._* 145 | 146 | # Files that might appear in the root of a volume 147 | .DocumentRevisions-V100 148 | .fseventsd 149 | .Spotlight-V100 150 | .TemporaryItems 151 | .Trashes 152 | .VolumeIcon.icns 153 | .com.apple.timemachine.donotpresent 154 | 155 | # Directories potentially created on remote AFP share 156 | .AppleDB 157 | .AppleDesktop 158 | Network Trash Folder 159 | Temporary Items 160 | .apdisk 161 | 162 | 163 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 164 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 165 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 166 | 167 | # User-specific stuff: 168 | .idea/* 169 | .idea/**/workspace.xml 170 | .idea/**/tasks.xml 171 | .idea/dictionaries 172 | .html # Bokeh Plots 173 | .pg # TensorFlow Frozen Graphs 174 | .avi # videos 175 | 176 | # Sensitive or high-churn files: 177 | .idea/**/dataSources/ 178 | .idea/**/dataSources.ids 179 | .idea/**/dataSources.local.xml 180 | .idea/**/sqlDataSources.xml 181 | .idea/**/dynamic.xml 182 | .idea/**/uiDesigner.xml 183 | 184 | # Gradle: 185 | .idea/**/gradle.xml 186 | .idea/**/libraries 187 | 188 | # CMake 189 | cmake-build-debug/ 190 | cmake-build-release/ 191 | 192 | # Mongo Explorer plugin: 193 | .idea/**/mongoSettings.xml 194 | 195 | ## File-based project format: 196 | *.iws 197 | 198 | ## Plugin-specific files: 199 | 200 | # IntelliJ 201 | out/ 202 | 203 | # mpeltonen/sbt-idea plugin 204 | .idea_modules/ 205 | 206 | # JIRA plugin 207 | atlassian-ide-plugin.xml 208 | 209 | # Cursive Clojure plugin 210 | .idea/replstate.xml 211 | 212 | # Crashlytics plugin (for Android Studio and IntelliJ) 213 | com_crashlytics_export_strings.xml 214 | crashlytics.properties 215 | crashlytics-build.properties 216 | fabric.properties 217 | -------------------------------------------------------------------------------- /yolov5/.gitattributes: -------------------------------------------------------------------------------- 1 | # this drop notebooks from GitHub language stats 2 | *.ipynb linguist-vendored 3 | -------------------------------------------------------------------------------- /yolov5/.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "🐛 Bug report" 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | Before submitting a bug report, please be aware that your issue **must be reproducible** with all of the following, otherwise it is non-actionable, and we can not help you: 11 | - **Current repo**: run `git fetch && git status -uno` to check and `git pull` to update repo 12 | - **Common dataset**: coco.yaml or coco128.yaml 13 | - **Common environment**: Colab, Google Cloud, or Docker image. See https://github.com/ultralytics/yolov5#environments 14 | 15 | If this is a custom dataset/training question you **must include** your `train*.jpg`, `test*.jpg` and `results.png` figures, or we can not help you. You can generate these with `utils.plot_results()`. 16 | 17 | 18 | ## 🐛 Bug 19 | A clear and concise description of what the bug is. 20 | 21 | 22 | ## To Reproduce (REQUIRED) 23 | 24 | Input: 25 | ``` 26 | import torch 27 | 28 | a = torch.tensor([5]) 29 | c = a / 0 30 | ``` 31 | 32 | Output: 33 | ``` 34 | Traceback (most recent call last): 35 | File "/Users/glennjocher/opt/anaconda3/envs/env1/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code 36 | exec(code_obj, self.user_global_ns, self.user_ns) 37 | File "", line 5, in 38 | c = a / 0 39 | RuntimeError: ZeroDivisionError 40 | ``` 41 | 42 | 43 | ## Expected behavior 44 | A clear and concise description of what you expected to happen. 45 | 46 | 47 | ## Environment 48 | If applicable, add screenshots to help explain your problem. 49 | 50 | - OS: [e.g. Ubuntu] 51 | - GPU [e.g. 2080 Ti] 52 | 53 | 54 | ## Additional context 55 | Add any other context about the problem here. 56 | -------------------------------------------------------------------------------- /yolov5/.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "🚀 Feature request" 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## 🚀 Feature 11 | 12 | 13 | ## Motivation 14 | 15 | 16 | 17 | ## Pitch 18 | 19 | 20 | 21 | ## Alternatives 22 | 23 | 24 | 25 | ## Additional context 26 | 27 | 28 | -------------------------------------------------------------------------------- /yolov5/.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "❓Question" 3 | about: Ask a general question 4 | title: '' 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## ❔Question 11 | 12 | 13 | ## Additional context 14 | -------------------------------------------------------------------------------- /yolov5/.github/workflows/ci-testing.yml: -------------------------------------------------------------------------------- 1 | name: CI CPU testing 2 | 3 | on: # https://help.github.com/en/actions/reference/events-that-trigger-workflows 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | # The branches below must be a subset of the branches above 8 | branches: [ master ] 9 | schedule: 10 | - cron: '0 0 * * *' # Runs at 00:00 UTC every day 11 | 12 | jobs: 13 | cpu-tests: 14 | 15 | runs-on: ${{ matrix.os }} 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | os: [ubuntu-latest, macos-latest, windows-latest] 20 | python-version: [3.8] 21 | model: ['yolov5s'] # models to test 22 | 23 | # Timeout: https://stackoverflow.com/a/59076067/4521646 24 | timeout-minutes: 50 25 | steps: 26 | - uses: actions/checkout@v2 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v2 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | 32 | # Note: This uses an internal pip API and may not always work 33 | # https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow 34 | - name: Get pip cache 35 | id: pip-cache 36 | run: | 37 | python -c "from pip._internal.locations import USER_CACHE_DIR; print('::set-output name=dir::' + USER_CACHE_DIR)" 38 | 39 | - name: Cache pip 40 | uses: actions/cache@v1 41 | with: 42 | path: ${{ steps.pip-cache.outputs.dir }} 43 | key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }} 44 | restore-keys: | 45 | ${{ runner.os }}-${{ matrix.python-version }}-pip- 46 | 47 | - name: Install dependencies 48 | run: | 49 | python -m pip install --upgrade pip 50 | pip install -qr requirements.txt -f https://download.pytorch.org/whl/cpu/torch_stable.html 51 | pip install -q onnx 52 | python --version 53 | pip --version 54 | pip list 55 | shell: bash 56 | 57 | - name: Download data 58 | run: | 59 | # curl -L -o tmp.zip https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip 60 | # unzip -q tmp.zip -d ../ 61 | # rm tmp.zip 62 | 63 | - name: Tests workflow 64 | run: | 65 | # export PYTHONPATH="$PWD" # to run '$ python *.py' files in subdirectories 66 | di=cpu # inference devices # define device 67 | 68 | # train 69 | python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di 70 | # detect 71 | python detect.py --weights weights/${{ matrix.model }}.pt --device $di 72 | python detect.py --weights runs/train/exp/weights/last.pt --device $di 73 | # test 74 | python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di 75 | python test.py --img 256 --batch 8 --weights runs/train/exp/weights/last.pt --device $di 76 | 77 | python models/yolo.py --cfg models/${{ matrix.model }}.yaml # inspect 78 | python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt # export 79 | shell: bash 80 | -------------------------------------------------------------------------------- /yolov5/.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # This action runs GitHub's industry-leading static analysis engine, CodeQL, against a repository's source code to find security vulnerabilities. 2 | # https://github.com/github/codeql-action 3 | 4 | name: "CodeQL" 5 | 6 | on: 7 | schedule: 8 | - cron: '0 0 1 * *' # Runs at 00:00 UTC on the 1st of every month 9 | 10 | jobs: 11 | analyze: 12 | name: Analyze 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | language: [ 'python' ] 19 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 20 | # Learn more: 21 | # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed 22 | 23 | steps: 24 | - name: Checkout repository 25 | uses: actions/checkout@v2 26 | 27 | # Initializes the CodeQL tools for scanning. 28 | - name: Initialize CodeQL 29 | uses: github/codeql-action/init@v1 30 | with: 31 | languages: ${{ matrix.language }} 32 | # If you wish to specify custom queries, you can do so here or in a config file. 33 | # By default, queries listed here will override any specified in a config file. 34 | # Prefix the list here with "+" to use these queries and those in the config file. 35 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 36 | 37 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 38 | # If this step fails, then you should remove it and run the build manually (see below) 39 | - name: Autobuild 40 | uses: github/codeql-action/autobuild@v1 41 | 42 | # ℹ️ Command-line programs to run using the OS shell. 43 | # 📚 https://git.io/JvXDl 44 | 45 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 46 | # and modify them (or add more) to build your code if your project 47 | # uses a compiled language 48 | 49 | #- run: | 50 | # make bootstrap 51 | # make release 52 | 53 | - name: Perform CodeQL Analysis 54 | uses: github/codeql-action/analyze@v1 55 | -------------------------------------------------------------------------------- /yolov5/.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request_target, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/first-interaction@v1 10 | with: 11 | repo-token: ${{ secrets.GITHUB_TOKEN }} 12 | pr-message: | 13 | Hello @${{ github.actor }}, thank you for submitting a PR! To allow your work to be integrated as seamlessly as possible, we advise you to: 14 | - Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch: 15 | ```bash 16 | git remote add upstream https://github.com/ultralytics/yolov5.git 17 | git fetch upstream 18 | git checkout feature # <----- replace 'feature' with local branch name 19 | git rebase upstream/master 20 | git push -u origin -f 21 | ``` 22 | - Verify all Continuous Integration (CI) **checks are passing**. 23 | - Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ -Bruce Lee 24 | 25 | issue-message: | 26 | Hello @${{ github.actor }}, thank you for your interest in 🚀 YOLOv5! Please visit our ⭐️ [Tutorials](https://github.com/ultralytics/yolov5/wiki#tutorials) to get started, where you can find quickstart guides for simple tasks like [Custom Data Training](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) all the way to advanced concepts like [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607). 27 | 28 | If this is a 🐛 Bug Report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you. 29 | 30 | If this is a custom training ❓ Question, please provide as much information as possible, including dataset images, training logs, screenshots, and a public link to online [W&B logging](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data#visualize) if available. 31 | 32 | For business inquiries or professional support requests please visit https://www.ultralytics.com or email Glenn Jocher at glenn.jocher@ultralytics.com. 33 | 34 | ## Requirements 35 | 36 | Python 3.8 or later with all [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) dependencies installed, including `torch>=1.7`. To install run: 37 | ```bash 38 | $ pip install -r requirements.txt 39 | ``` 40 | 41 | ## Environments 42 | 43 | YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): 44 | 45 | - **Google Colab Notebook** with free GPU: Open In Colab 46 | - **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5) 47 | - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) 48 | - **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker) 49 | 50 | ## Status 51 | 52 | ![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg) 53 | 54 | If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([test.py](https://github.com/ultralytics/yolov5/blob/master/test.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/models/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit. 55 | 56 | -------------------------------------------------------------------------------- /yolov5/.github/workflows/rebase.yml: -------------------------------------------------------------------------------- 1 | name: Automatic Rebase 2 | # https://github.com/marketplace/actions/automatic-rebase 3 | 4 | on: 5 | issue_comment: 6 | types: [created] 7 | 8 | jobs: 9 | rebase: 10 | name: Rebase 11 | if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/rebase') 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout the latest code 15 | uses: actions/checkout@v2 16 | with: 17 | fetch-depth: 0 18 | - name: Automatic Rebase 19 | uses: cirrus-actions/rebase@1.3.1 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /yolov5/.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close stale issues 2 | on: 3 | schedule: 4 | - cron: "0 0 * * *" 5 | 6 | jobs: 7 | stale: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/stale@v1 11 | with: 12 | repo-token: ${{ secrets.GITHUB_TOKEN }} 13 | stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' 14 | stale-pr-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.' 15 | days-before-stale: 30 16 | days-before-close: 5 17 | exempt-issue-labels: 'documentation,tutorial' 18 | operations-per-run: 100 # The maximum number of operations per run, used to control rate limiting. 19 | -------------------------------------------------------------------------------- /yolov5/.gitignore: -------------------------------------------------------------------------------- 1 | # Repo-specific GitIgnore ---------------------------------------------------------------------------------------------- 2 | *.jpg 3 | *.jpeg 4 | *.png 5 | *.bmp 6 | *.tif 7 | *.tiff 8 | *.heic 9 | *.JPG 10 | *.JPEG 11 | *.PNG 12 | *.BMP 13 | *.TIF 14 | *.TIFF 15 | *.HEIC 16 | *.mp4 17 | *.mov 18 | *.MOV 19 | *.avi 20 | *.data 21 | *.json 22 | 23 | *.cfg 24 | !cfg/yolov3*.cfg 25 | 26 | storage.googleapis.com 27 | runs/* 28 | data/* 29 | !data/images/zidane.jpg 30 | !data/images/bus.jpg 31 | !data/coco.names 32 | !data/coco_paper.names 33 | !data/coco.data 34 | !data/coco_*.data 35 | !data/coco_*.txt 36 | !data/trainvalno5k.shapes 37 | !data/*.sh 38 | 39 | pycocotools/* 40 | results*.txt 41 | gcp_test*.sh 42 | 43 | # Datasets ------------------------------------------------------------------------------------------------------------- 44 | coco/ 45 | coco128/ 46 | VOC/ 47 | 48 | # MATLAB GitIgnore ----------------------------------------------------------------------------------------------------- 49 | *.m~ 50 | *.mat 51 | !targets*.mat 52 | 53 | # Neural Network weights ----------------------------------------------------------------------------------------------- 54 | *.weights 55 | *.pt 56 | *.onnx 57 | *.mlmodel 58 | *.torchscript 59 | darknet53.conv.74 60 | yolov3-tiny.conv.15 61 | 62 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 63 | # Byte-compiled / optimized / DLL files 64 | __pycache__/ 65 | *.py[cod] 66 | *$py.class 67 | 68 | # C extensions 69 | *.so 70 | 71 | # Distribution / packaging 72 | .Python 73 | env/ 74 | build/ 75 | develop-eggs/ 76 | dist/ 77 | downloads/ 78 | eggs/ 79 | .eggs/ 80 | lib/ 81 | lib64/ 82 | parts/ 83 | sdist/ 84 | var/ 85 | wheels/ 86 | *.egg-info/ 87 | wandb/ 88 | .installed.cfg 89 | *.egg 90 | 91 | 92 | # PyInstaller 93 | # Usually these files are written by a python script from a template 94 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 95 | *.manifest 96 | *.spec 97 | 98 | # Installer logs 99 | pip-log.txt 100 | pip-delete-this-directory.txt 101 | 102 | # Unit test / coverage reports 103 | htmlcov/ 104 | .tox/ 105 | .coverage 106 | .coverage.* 107 | .cache 108 | nosetests.xml 109 | coverage.xml 110 | *.cover 111 | .hypothesis/ 112 | 113 | # Translations 114 | *.mo 115 | *.pot 116 | 117 | # Django stuff: 118 | *.log 119 | local_settings.py 120 | 121 | # Flask stuff: 122 | instance/ 123 | .webassets-cache 124 | 125 | # Scrapy stuff: 126 | .scrapy 127 | 128 | # Sphinx documentation 129 | docs/_build/ 130 | 131 | # PyBuilder 132 | target/ 133 | 134 | # Jupyter Notebook 135 | .ipynb_checkpoints 136 | 137 | # pyenv 138 | .python-version 139 | 140 | # celery beat schedule file 141 | celerybeat-schedule 142 | 143 | # SageMath parsed files 144 | *.sage.py 145 | 146 | # dotenv 147 | .env 148 | 149 | # virtualenv 150 | .venv* 151 | venv*/ 152 | ENV*/ 153 | 154 | # Spyder project settings 155 | .spyderproject 156 | .spyproject 157 | 158 | # Rope project settings 159 | .ropeproject 160 | 161 | # mkdocs documentation 162 | /site 163 | 164 | # mypy 165 | .mypy_cache/ 166 | 167 | 168 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 169 | 170 | # General 171 | .DS_Store 172 | .AppleDouble 173 | .LSOverride 174 | 175 | # Icon must end with two \r 176 | Icon 177 | Icon? 178 | 179 | # Thumbnails 180 | ._* 181 | 182 | # Files that might appear in the root of a volume 183 | .DocumentRevisions-V100 184 | .fseventsd 185 | .Spotlight-V100 186 | .TemporaryItems 187 | .Trashes 188 | .VolumeIcon.icns 189 | .com.apple.timemachine.donotpresent 190 | 191 | # Directories potentially created on remote AFP share 192 | .AppleDB 193 | .AppleDesktop 194 | Network Trash Folder 195 | Temporary Items 196 | .apdisk 197 | 198 | 199 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 200 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 201 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 202 | 203 | # User-specific stuff: 204 | .idea/* 205 | .idea/**/workspace.xml 206 | .idea/**/tasks.xml 207 | .idea/dictionaries 208 | .html # Bokeh Plots 209 | .pg # TensorFlow Frozen Graphs 210 | .avi # videos 211 | 212 | # Sensitive or high-churn files: 213 | .idea/**/dataSources/ 214 | .idea/**/dataSources.ids 215 | .idea/**/dataSources.local.xml 216 | .idea/**/sqlDataSources.xml 217 | .idea/**/dynamic.xml 218 | .idea/**/uiDesigner.xml 219 | 220 | # Gradle: 221 | .idea/**/gradle.xml 222 | .idea/**/libraries 223 | 224 | # CMake 225 | cmake-build-debug/ 226 | cmake-build-release/ 227 | 228 | # Mongo Explorer plugin: 229 | .idea/**/mongoSettings.xml 230 | 231 | ## File-based project format: 232 | *.iws 233 | 234 | ## Plugin-specific files: 235 | 236 | # IntelliJ 237 | out/ 238 | 239 | # mpeltonen/sbt-idea plugin 240 | .idea_modules/ 241 | 242 | # JIRA plugin 243 | atlassian-ide-plugin.xml 244 | 245 | # Cursive Clojure plugin 246 | .idea/replstate.xml 247 | 248 | # Crashlytics plugin (for Android Studio and IntelliJ) 249 | com_crashlytics_export_strings.xml 250 | crashlytics.properties 251 | crashlytics-build.properties 252 | fabric.properties 253 | -------------------------------------------------------------------------------- /yolov5/Dockerfile: -------------------------------------------------------------------------------- 1 | # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch 2 | FROM nvcr.io/nvidia/pytorch:20.12-py3 3 | 4 | # Install linux packages 5 | RUN apt update && apt install -y screen libgl1-mesa-glx 6 | 7 | # Install python dependencies 8 | RUN pip install --upgrade pip 9 | COPY requirements.txt . 10 | RUN pip install -r requirements.txt 11 | RUN pip install gsutil 12 | 13 | # Create working directory 14 | RUN mkdir -p /usr/src/app 15 | WORKDIR /usr/src/app 16 | 17 | # Copy contents 18 | COPY . /usr/src/app 19 | 20 | # Copy weights 21 | #RUN python3 -c "from models import *; \ 22 | #attempt_download('weights/yolov5s.pt'); \ 23 | #attempt_download('weights/yolov5m.pt'); \ 24 | #attempt_download('weights/yolov5l.pt')" 25 | 26 | 27 | # --------------------------------------------------- Extras Below --------------------------------------------------- 28 | 29 | # Build and Push 30 | # t=ultralytics/yolov5:latest && sudo docker build -t $t . && sudo docker push $t 31 | # for v in {300..303}; do t=ultralytics/coco:v$v && sudo docker build -t $t . && sudo docker push $t; done 32 | 33 | # Pull and Run 34 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all $t 35 | 36 | # Pull and Run with local directory access 37 | # t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/coco:/usr/src/coco $t 38 | 39 | # Kill all 40 | # sudo docker kill $(sudo docker ps -q) 41 | 42 | # Kill all image-based 43 | # sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov5:latest) 44 | 45 | # Bash into running container 46 | # sudo docker container exec -it ba65811811ab bash 47 | 48 | # Bash into stopped container 49 | # sudo docker commit 092b16b25c5b usr/resume && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco --entrypoint=sh usr/resume 50 | 51 | # Send weights to GCP 52 | # python -c "from utils.general import *; strip_optimizer('runs/train/exp0_*/weights/best.pt', 'tmp.pt')" && gsutil cp tmp.pt gs://*.pt 53 | 54 | # Clean up 55 | # docker system prune -a --volumes 56 | -------------------------------------------------------------------------------- /yolov5/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 |   4 | 5 | ![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg) 6 | 7 | This repository represents Ultralytics open-source research into future object detection methods, and incorporates our lessons learned and best practices evolved over training thousands of models on custom client datasets with our previous YOLO repository https://github.com/ultralytics/yolov3. **All code and models are under active development, and are subject to modification or deletion without notice.** Use at your own risk. 8 | 9 | ** GPU Speed measures end-to-end time per image averaged over 5000 COCO val2017 images using a V100 GPU with batch size 32, and includes image preprocessing, PyTorch FP16 inference, postprocessing and NMS. EfficientDet data from [google/automl](https://github.com/google/automl) at batch size 8. 10 | 11 | - **August 13, 2020**: [v3.0 release](https://github.com/ultralytics/yolov5/releases/tag/v3.0): nn.Hardswish() activations, data autodownload, native AMP. 12 | - **July 23, 2020**: [v2.0 release](https://github.com/ultralytics/yolov5/releases/tag/v2.0): improved model definition, training and mAP. 13 | - **June 22, 2020**: [PANet](https://arxiv.org/abs/1803.01534) updates: new heads, reduced parameters, improved speed and mAP [364fcfd](https://github.com/ultralytics/yolov5/commit/364fcfd7dba53f46edd4f04c037a039c0a287972). 14 | - **June 19, 2020**: [FP16](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.half) as new default for smaller checkpoints and faster inference [d4c6674](https://github.com/ultralytics/yolov5/commit/d4c6674c98e19df4c40e33a777610a18d1961145). 15 | - **June 9, 2020**: [CSP](https://github.com/WongKinYiu/CrossStagePartialNetworks) updates: improved speed, size, and accuracy (credit to @WongKinYiu for CSP). 16 | - **May 27, 2020**: Public release. YOLOv5 models are SOTA among all known YOLO implementations. 17 | 18 | 19 | ## Pretrained Checkpoints 20 | 21 | | Model | APval | APtest | AP50 | SpeedGPU | FPSGPU || params | GFLOPS | 22 | |---------- |------ |------ |------ | -------- | ------| ------ |------ | :------: | 23 | | [YOLOv5s](https://github.com/ultralytics/yolov5/releases) | 37.0 | 37.0 | 56.2 | **2.4ms** | **416** || 7.5M | 17.5 24 | | [YOLOv5m](https://github.com/ultralytics/yolov5/releases) | 44.3 | 44.3 | 63.2 | 3.4ms | 294 || 21.8M | 52.3 25 | | [YOLOv5l](https://github.com/ultralytics/yolov5/releases) | 47.7 | 47.7 | 66.5 | 4.4ms | 227 || 47.8M | 117.2 26 | | [YOLOv5x](https://github.com/ultralytics/yolov5/releases) | **49.2** | **49.2** | **67.7** | 6.9ms | 145 || 89.0M | 221.5 27 | | | | | | | || | 28 | | [YOLOv5x](https://github.com/ultralytics/yolov5/releases) + TTA|**50.8**| **50.8** | **68.9** | 25.5ms | 39 || 89.0M | 801.0 29 | 30 | ** APtest denotes COCO [test-dev2017](http://cocodataset.org/#upload) server results, all other AP results denote val2017 accuracy. 31 | ** All AP numbers are for single-model single-scale without ensemble or TTA. **Reproduce mAP** by `python test.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65` 32 | ** SpeedGPU averaged over 5000 COCO val2017 images using a GCP [n1-standard-16](https://cloud.google.com/compute/docs/machine-types#n1_standard_machine_types) V100 instance, and includes image preprocessing, FP16 inference, postprocessing and NMS. NMS is 1-2ms/img. **Reproduce speed** by `python test.py --data coco.yaml --img 640 --conf 0.25 --iou 0.45` 33 | ** All checkpoints are trained to 300 epochs with default settings and hyperparameters (no autoaugmentation). 34 | ** Test Time Augmentation ([TTA](https://github.com/ultralytics/yolov5/issues/303)) runs at 3 image sizes. **Reproduce TTA** by `python test.py --data coco.yaml --img 832 --iou 0.65 --augment` 35 | 36 | ## Requirements 37 | 38 | Python 3.8 or later with all [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) dependencies installed, including `torch>=1.7`. To install run: 39 | ```bash 40 | $ pip install -r requirements.txt 41 | ``` 42 | 43 | 44 | ## Tutorials 45 | 46 | * [Train Custom Data](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data)  🚀 RECOMMENDED 47 | * [Weights & Biases Logging](https://github.com/ultralytics/yolov5/issues/1289)  🌟 NEW 48 | * [Multi-GPU Training](https://github.com/ultralytics/yolov5/issues/475) 49 | * [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36)  ⭐ NEW 50 | * [ONNX and TorchScript Export](https://github.com/ultralytics/yolov5/issues/251) 51 | * [Test-Time Augmentation (TTA)](https://github.com/ultralytics/yolov5/issues/303) 52 | * [Model Ensembling](https://github.com/ultralytics/yolov5/issues/318) 53 | * [Model Pruning/Sparsity](https://github.com/ultralytics/yolov5/issues/304) 54 | * [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607) 55 | * [Transfer Learning with Frozen Layers](https://github.com/ultralytics/yolov5/issues/1314)  ⭐ NEW 56 | * [TensorRT Deployment](https://github.com/wang-xinyu/tensorrtx) 57 | 58 | 59 | ## Environments 60 | 61 | YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled): 62 | 63 | - **Google Colab Notebook** with free GPU: Open In Colab 64 | - **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5) 65 | - **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) 66 | - **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker) 67 | 68 | 69 | ## Inference 70 | 71 | detect.py runs inference on a variety of sources, downloading models automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases) and saving results to `runs/detect`. 72 | ```bash 73 | $ python detect.py --source 0 # webcam 74 | file.jpg # image 75 | file.mp4 # video 76 | path/ # directory 77 | path/*.jpg # glob 78 | rtsp://170.93.143.139/rtplive/470011e600ef003a004ee33696235daa # rtsp stream 79 | rtmp://192.168.1.105/live/test # rtmp stream 80 | http://112.50.243.8/PLTV/88888888/224/3221225900/1.m3u8 # http stream 81 | ``` 82 | 83 | To run inference on example images in `data/images`: 84 | ```bash 85 | $ python detect.py --source data/images --weights yolov5s.pt --conf 0.25 86 | 87 | Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', img_size=640, iou_thres=0.45, save_conf=False, save_dir='runs/detect', save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt']) 88 | Using torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130MB) 89 | 90 | Downloading https://github.com/ultralytics/yolov5/releases/download/v3.1/yolov5s.pt to yolov5s.pt... 100%|██████████████| 14.5M/14.5M [00:00<00:00, 21.3MB/s] 91 | 92 | Fusing layers... 93 | Model Summary: 232 layers, 7459581 parameters, 0 gradients 94 | image 1/2 data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.012s) 95 | image 2/2 data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.012s) 96 | Results saved to runs/detect/exp 97 | Done. (0.113s) 98 | ``` 99 | 100 | 101 | ### PyTorch Hub 102 | 103 | To run **batched inference** with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36): 104 | ```python 105 | import torch 106 | from PIL import Image 107 | 108 | # Model 109 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) # for PIL/cv2/np inputs and NMS 110 | 111 | # Images 112 | img1 = Image.open('zidane.jpg') 113 | img2 = Image.open('bus.jpg') 114 | imgs = [img1, img2] # batched list of images 115 | 116 | # Inference 117 | prediction = model(imgs, size=640) # includes NMS 118 | ``` 119 | 120 | 121 | ## Training 122 | 123 | Download [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) and run command below. Training times for YOLOv5s/m/l/x are 2/4/6/8 days on a single V100 (multi-GPU times faster). Use the largest `--batch-size` your GPU allows (batch sizes shown for 16 GB devices). 124 | ```bash 125 | $ python train.py --data coco.yaml --cfg yolov5s.yaml --weights '' --batch-size 64 126 | yolov5m 40 127 | yolov5l 24 128 | yolov5x 16 129 | ``` 130 | 131 | 132 | 133 | ## Citation 134 | 135 | [![DOI](https://zenodo.org/badge/264818686.svg)](https://zenodo.org/badge/latestdoi/264818686) 136 | 137 | 138 | ## About Us 139 | 140 | Ultralytics is a U.S.-based particle physics and AI startup with over 6 years of expertise supporting government, academic and business clients. We offer a wide range of vision AI services, spanning from simple expert advice up to delivery of fully customized, end-to-end production solutions, including: 141 | - **Cloud-based AI** systems operating on **hundreds of HD video streams in realtime.** 142 | - **Edge AI** integrated into custom iOS and Android apps for realtime **30 FPS video inference.** 143 | - **Custom data training**, hyperparameter evolution, and model exportation to any destination. 144 | 145 | For business inquiries and professional support requests please visit us at https://www.ultralytics.com. 146 | 147 | 148 | ## Contact 149 | 150 | **Issues should be raised directly in the repository.** For business inquiries or professional support requests please visit https://www.ultralytics.com or email Glenn Jocher at glenn.jocher@ultralytics.com. 151 | -------------------------------------------------------------------------------- /yolov5/detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | from pathlib import Path 4 | 5 | import cv2 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | from numpy import random 9 | 10 | from models.experimental import attempt_load 11 | from utils.datasets import LoadStreams, LoadImages 12 | from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \ 13 | strip_optimizer, set_logging, increment_path 14 | from utils.plots import plot_one_box 15 | from utils.torch_utils import select_device, load_classifier, time_synchronized 16 | 17 | 18 | def detect(save_img=False): 19 | source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size 20 | webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( 21 | ('rtsp://', 'rtmp://', 'http://')) 22 | 23 | # Directories 24 | save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run 25 | (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir 26 | 27 | # Initialize 28 | set_logging() 29 | device = select_device(opt.device) 30 | half = device.type != 'cpu' # half precision only supported on CUDA 31 | 32 | # Load model 33 | model = attempt_load(weights, map_location=device) # load FP32 model 34 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size 35 | if half: 36 | model.half() # to FP16 37 | 38 | # Second-stage classifier 39 | classify = False 40 | if classify: 41 | modelc = load_classifier(name='resnet101', n=2) # initialize 42 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() 43 | 44 | # Set Dataloader 45 | vid_path, vid_writer = None, None 46 | if webcam: 47 | view_img = True 48 | cudnn.benchmark = True # set True to speed up constant image size inference 49 | dataset = LoadStreams(source, img_size=imgsz) 50 | else: 51 | save_img = True 52 | dataset = LoadImages(source, img_size=imgsz) 53 | 54 | # Get names and colors 55 | names = model.module.names if hasattr(model, 'module') else model.names 56 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] 57 | 58 | # Run inference 59 | t0 = time.time() 60 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 61 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 62 | for path, img, im0s, vid_cap in dataset: 63 | img = torch.from_numpy(img).to(device) 64 | img = img.half() if half else img.float() # uint8 to fp16/32 65 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 66 | if img.ndimension() == 3: 67 | img = img.unsqueeze(0) 68 | 69 | # Inference 70 | t1 = time_synchronized() 71 | pred = model(img, augment=opt.augment)[0] 72 | 73 | # Apply NMS 74 | pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) 75 | t2 = time_synchronized() 76 | 77 | # Apply Classifier 78 | if classify: 79 | pred = apply_classifier(pred, modelc, img, im0s) 80 | 81 | # Process detections 82 | for i, det in enumerate(pred): # detections per image 83 | if webcam: # batch_size >= 1 84 | p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count 85 | else: 86 | p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) 87 | 88 | p = Path(p) # to Path 89 | save_path = str(save_dir / p.name) # img.jpg 90 | txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt 91 | s += '%gx%g ' % img.shape[2:] # print string 92 | gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh 93 | if len(det): 94 | # Rescale boxes from img_size to im0 size 95 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 96 | 97 | # Print results 98 | for c in det[:, -1].unique(): 99 | n = (det[:, -1] == c).sum() # detections per class 100 | s += f'{n} {names[int(c)]}s, ' # add to string 101 | 102 | # Write results 103 | for *xyxy, conf, cls in reversed(det): 104 | if save_txt: # Write to file 105 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 106 | line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format 107 | with open(txt_path + '.txt', 'a') as f: 108 | f.write(('%g ' * len(line)).rstrip() % line + '\n') 109 | 110 | if save_img or view_img: # Add bbox to image 111 | label = f'{names[int(cls)]} {conf:.2f}' 112 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) 113 | 114 | # Print time (inference + NMS) 115 | print(f'{s}Done. ({t2 - t1:.3f}s)') 116 | 117 | # Stream results 118 | if view_img: 119 | cv2.imshow(str(p), im0) 120 | if cv2.waitKey(1) == ord('q'): # q to quit 121 | raise StopIteration 122 | 123 | # Save results (image with detections) 124 | if save_img: 125 | if dataset.mode == 'image': 126 | cv2.imwrite(save_path, im0) 127 | else: # 'video' 128 | if vid_path != save_path: # new video 129 | vid_path = save_path 130 | if isinstance(vid_writer, cv2.VideoWriter): 131 | vid_writer.release() # release previous video writer 132 | 133 | fourcc = 'mp4v' # output video codec 134 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 135 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 136 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 137 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) 138 | vid_writer.write(im0) 139 | 140 | if save_txt or save_img: 141 | s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' 142 | print(f"Results saved to {save_dir}{s}") 143 | 144 | print(f'Done. ({time.time() - t0:.3f}s)') 145 | 146 | 147 | if __name__ == '__main__': 148 | parser = argparse.ArgumentParser() 149 | parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') 150 | parser.add_argument('--source', type=str, default='data/images', help='source') # file/folder, 0 for webcam 151 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 152 | parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold') 153 | parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS') 154 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 155 | parser.add_argument('--view-img', action='store_true', help='display results') 156 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 157 | parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') 158 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3') 159 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 160 | parser.add_argument('--augment', action='store_true', help='augmented inference') 161 | parser.add_argument('--update', action='store_true', help='update all models') 162 | parser.add_argument('--project', default='runs/detect', help='save results to project/name') 163 | parser.add_argument('--name', default='exp', help='save results to project/name') 164 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 165 | opt = parser.parse_args() 166 | print(opt) 167 | 168 | with torch.no_grad(): 169 | if opt.update: # update all models (to fix SourceChangeWarning) 170 | for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: 171 | detect() 172 | strip_optimizer(opt.weights) 173 | else: 174 | detect() 175 | -------------------------------------------------------------------------------- /yolov5/hubconf.py: -------------------------------------------------------------------------------- 1 | """File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/ 2 | 3 | Usage: 4 | import torch 5 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80) 6 | """ 7 | 8 | from pathlib import Path 9 | 10 | import torch 11 | 12 | from models.yolo import Model 13 | from utils.general import set_logging 14 | from utils.google_utils import attempt_download 15 | 16 | dependencies = ['torch', 'yaml'] 17 | set_logging() 18 | 19 | 20 | def create(name, pretrained, channels, classes, autoshape): 21 | """Creates a specified YOLOv5 model 22 | 23 | Arguments: 24 | name (str): name of model, i.e. 'yolov5s' 25 | pretrained (bool): load pretrained weights into the model 26 | channels (int): number of input channels 27 | classes (int): number of model classes 28 | 29 | Returns: 30 | pytorch model 31 | """ 32 | config = Path(__file__).parent / 'models' / f'{name}.yaml' # model.yaml path 33 | try: 34 | model = Model(config, channels, classes) 35 | if pretrained: 36 | fname = f'{name}.pt' # checkpoint filename 37 | attempt_download(fname) # download if not found locally 38 | ckpt = torch.load(fname, map_location=torch.device('cpu')) # load 39 | state_dict = ckpt['model'].float().state_dict() # to FP32 40 | state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape} # filter 41 | model.load_state_dict(state_dict, strict=False) # load 42 | if len(ckpt['model'].names) == classes: 43 | model.names = ckpt['model'].names # set class names attribute 44 | if autoshape: 45 | model = model.autoshape() # for file/URI/PIL/cv2/np inputs and NMS 46 | return model 47 | 48 | except Exception as e: 49 | help_url = 'https://github.com/ultralytics/yolov5/issues/36' 50 | s = 'Cache maybe be out of date, try force_reload=True. See %s for help.' % help_url 51 | raise Exception(s) from e 52 | 53 | 54 | def yolov5s(pretrained=False, channels=3, classes=80, autoshape=True): 55 | """YOLOv5-small model from https://github.com/ultralytics/yolov5 56 | 57 | Arguments: 58 | pretrained (bool): load pretrained weights into the model, default=False 59 | channels (int): number of input channels, default=3 60 | classes (int): number of model classes, default=80 61 | 62 | Returns: 63 | pytorch model 64 | """ 65 | return create('yolov5s', pretrained, channels, classes, autoshape) 66 | 67 | 68 | def yolov5m(pretrained=False, channels=3, classes=80, autoshape=True): 69 | """YOLOv5-medium model from https://github.com/ultralytics/yolov5 70 | 71 | Arguments: 72 | pretrained (bool): load pretrained weights into the model, default=False 73 | channels (int): number of input channels, default=3 74 | classes (int): number of model classes, default=80 75 | 76 | Returns: 77 | pytorch model 78 | """ 79 | return create('yolov5m', pretrained, channels, classes, autoshape) 80 | 81 | 82 | def yolov5l(pretrained=False, channels=3, classes=80, autoshape=True): 83 | """YOLOv5-large model from https://github.com/ultralytics/yolov5 84 | 85 | Arguments: 86 | pretrained (bool): load pretrained weights into the model, default=False 87 | channels (int): number of input channels, default=3 88 | classes (int): number of model classes, default=80 89 | 90 | Returns: 91 | pytorch model 92 | """ 93 | return create('yolov5l', pretrained, channels, classes, autoshape) 94 | 95 | 96 | def yolov5x(pretrained=False, channels=3, classes=80, autoshape=True): 97 | """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5 98 | 99 | Arguments: 100 | pretrained (bool): load pretrained weights into the model, default=False 101 | channels (int): number of input channels, default=3 102 | classes (int): number of model classes, default=80 103 | 104 | Returns: 105 | pytorch model 106 | """ 107 | return create('yolov5x', pretrained, channels, classes, autoshape) 108 | 109 | 110 | def custom(path_or_model='path/to/model.pt', autoshape=True): 111 | """YOLOv5-custom model from https://github.com/ultralytics/yolov5 112 | 113 | Arguments (3 options): 114 | path_or_model (str): 'path/to/model.pt' 115 | path_or_model (dict): torch.load('path/to/model.pt') 116 | path_or_model (nn.Module): torch.load('path/to/model.pt')['model'] 117 | 118 | Returns: 119 | pytorch model 120 | """ 121 | model = torch.load(path_or_model) if isinstance(path_or_model, str) else path_or_model # load checkpoint 122 | if isinstance(model, dict): 123 | model = model['model'] # load model 124 | 125 | hub_model = Model(model.yaml).to(next(model.parameters()).device) # create 126 | hub_model.load_state_dict(model.float().state_dict()) # load state_dict 127 | hub_model.names = model.names # class names 128 | return hub_model.autoshape() if autoshape else hub_model 129 | 130 | 131 | if __name__ == '__main__': 132 | model = create(name='yolov5s', pretrained=True, channels=3, classes=80, autoshape=True) # pretrained example 133 | # model = custom(path_or_model='path/to/model.pt') # custom example 134 | 135 | # Verify inference 136 | from PIL import Image 137 | 138 | imgs = [Image.open(x) for x in Path('data/images').glob('*.jpg')] 139 | results = model(imgs) 140 | results.show() 141 | results.print() 142 | -------------------------------------------------------------------------------- /yolov5/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorturtle/classy-sort-yolov5/adc650f966b172fd925aff6650e31a03f09fc4c6/yolov5/models/__init__.py -------------------------------------------------------------------------------- /yolov5/models/common.py: -------------------------------------------------------------------------------- 1 | # This file contains modules common to various models 2 | 3 | import math 4 | import numpy as np 5 | import requests 6 | import torch 7 | import torch.nn as nn 8 | from PIL import Image, ImageDraw 9 | 10 | from utils.datasets import letterbox 11 | from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh 12 | from utils.plots import color_list 13 | 14 | 15 | def autopad(k, p=None): # kernel, padding 16 | # Pad to 'same' 17 | if p is None: 18 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 19 | return p 20 | 21 | 22 | def DWConv(c1, c2, k=1, s=1, act=True): 23 | # Depthwise convolution 24 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 25 | 26 | 27 | class Conv(nn.Module): 28 | # Standard convolution 29 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 30 | super(Conv, self).__init__() 31 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 32 | self.bn = nn.BatchNorm2d(c2) 33 | self.act = nn.Hardswish() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 34 | 35 | def forward(self, x): 36 | return self.act(self.bn(self.conv(x))) 37 | 38 | def fuseforward(self, x): 39 | return self.act(self.conv(x)) 40 | 41 | 42 | class Bottleneck(nn.Module): 43 | # Standard bottleneck 44 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 45 | super(Bottleneck, self).__init__() 46 | c_ = int(c2 * e) # hidden channels 47 | self.cv1 = Conv(c1, c_, 1, 1) 48 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 49 | self.add = shortcut and c1 == c2 50 | 51 | def forward(self, x): 52 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 53 | 54 | 55 | class BottleneckCSP(nn.Module): 56 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 57 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 58 | super(BottleneckCSP, self).__init__() 59 | c_ = int(c2 * e) # hidden channels 60 | self.cv1 = Conv(c1, c_, 1, 1) 61 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 62 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 63 | self.cv4 = Conv(2 * c_, c2, 1, 1) 64 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 65 | self.act = nn.LeakyReLU(0.1, inplace=True) 66 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 67 | 68 | def forward(self, x): 69 | y1 = self.cv3(self.m(self.cv1(x))) 70 | y2 = self.cv2(x) 71 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 72 | 73 | 74 | class C3(nn.Module): 75 | # CSP Bottleneck with 3 convolutions 76 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 77 | super(C3, self).__init__() 78 | c_ = int(c2 * e) # hidden channels 79 | self.cv1 = Conv(c1, c_, 1, 1) 80 | self.cv2 = Conv(c1, c_, 1, 1) 81 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) 82 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 83 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 84 | 85 | def forward(self, x): 86 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) 87 | 88 | 89 | class SPP(nn.Module): 90 | # Spatial pyramid pooling layer used in YOLOv3-SPP 91 | def __init__(self, c1, c2, k=(5, 9, 13)): 92 | super(SPP, self).__init__() 93 | c_ = c1 // 2 # hidden channels 94 | self.cv1 = Conv(c1, c_, 1, 1) 95 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 96 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 97 | 98 | def forward(self, x): 99 | x = self.cv1(x) 100 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 101 | 102 | 103 | class Focus(nn.Module): 104 | # Focus wh information into c-space 105 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 106 | super(Focus, self).__init__() 107 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 108 | 109 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 110 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 111 | 112 | 113 | class Concat(nn.Module): 114 | # Concatenate a list of tensors along dimension 115 | def __init__(self, dimension=1): 116 | super(Concat, self).__init__() 117 | self.d = dimension 118 | 119 | def forward(self, x): 120 | return torch.cat(x, self.d) 121 | 122 | 123 | class NMS(nn.Module): 124 | # Non-Maximum Suppression (NMS) module 125 | conf = 0.25 # confidence threshold 126 | iou = 0.45 # IoU threshold 127 | classes = None # (optional list) filter by class 128 | 129 | def __init__(self): 130 | super(NMS, self).__init__() 131 | 132 | def forward(self, x): 133 | return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) 134 | 135 | 136 | class autoShape(nn.Module): 137 | # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS 138 | img_size = 640 # inference size (pixels) 139 | conf = 0.25 # NMS confidence threshold 140 | iou = 0.45 # NMS IoU threshold 141 | classes = None # (optional list) filter by class 142 | 143 | def __init__(self, model): 144 | super(autoShape, self).__init__() 145 | self.model = model.eval() 146 | 147 | def autoshape(self): 148 | print('autoShape already enabled, skipping... ') # model already converted to model.autoshape() 149 | return self 150 | 151 | def forward(self, imgs, size=640, augment=False, profile=False): 152 | # Inference from various sources. For height=720, width=1280, RGB images example inputs are: 153 | # filename: imgs = 'data/samples/zidane.jpg' 154 | # URI: = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/zidane.jpg' 155 | # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(720,1280,3) 156 | # PIL: = Image.open('image.jpg') # HWC x(720,1280,3) 157 | # numpy: = np.zeros((720,1280,3)) # HWC 158 | # torch: = torch.zeros(16,3,720,1280) # BCHW 159 | # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images 160 | 161 | p = next(self.model.parameters()) # for device and type 162 | if isinstance(imgs, torch.Tensor): # torch 163 | return self.model(imgs.to(p.device).type_as(p), augment, profile) # inference 164 | 165 | # Pre-process 166 | n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images 167 | shape0, shape1 = [], [] # image and inference shapes 168 | for i, im in enumerate(imgs): 169 | if isinstance(im, str): # filename or uri 170 | im = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im) # open 171 | im = np.array(im) # to numpy 172 | if im.shape[0] < 5: # image in CHW 173 | im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1) 174 | im = im[:, :, :3] if im.ndim == 3 else np.tile(im[:, :, None], 3) # enforce 3ch input 175 | s = im.shape[:2] # HWC 176 | shape0.append(s) # image shape 177 | g = (size / max(s)) # gain 178 | shape1.append([y * g for y in s]) 179 | imgs[i] = im # update 180 | shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)] # inference shape 181 | x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad 182 | x = np.stack(x, 0) if n > 1 else x[0][None] # stack 183 | x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW 184 | x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 185 | 186 | # Inference 187 | with torch.no_grad(): 188 | y = self.model(x, augment, profile)[0] # forward 189 | y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes) # NMS 190 | 191 | # Post-process 192 | for i in range(n): 193 | scale_coords(shape1, y[i][:, :4], shape0[i]) 194 | 195 | return Detections(imgs, y, self.names) 196 | 197 | 198 | class Detections: 199 | # detections class for YOLOv5 inference results 200 | def __init__(self, imgs, pred, names=None): 201 | super(Detections, self).__init__() 202 | d = pred[0].device # device 203 | gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations 204 | self.imgs = imgs # list of images as numpy arrays 205 | self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) 206 | self.names = names # class names 207 | self.xyxy = pred # xyxy pixels 208 | self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels 209 | self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized 210 | self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized 211 | self.n = len(self.pred) 212 | 213 | def display(self, pprint=False, show=False, save=False): 214 | colors = color_list() 215 | for i, (img, pred) in enumerate(zip(self.imgs, self.pred)): 216 | str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} ' 217 | if pred is not None: 218 | for c in pred[:, -1].unique(): 219 | n = (pred[:, -1] == c).sum() # detections per class 220 | str += f'{n} {self.names[int(c)]}s, ' # add to string 221 | if show or save: 222 | img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np 223 | for *box, conf, cls in pred: # xyxy, confidence, class 224 | # str += '%s %.2f, ' % (names[int(cls)], conf) # label 225 | ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot 226 | if save: 227 | f = f'results{i}.jpg' 228 | str += f"saved to '{f}'" 229 | img.save(f) # save 230 | if show: 231 | img.show(f'Image {i}') # show 232 | if pprint: 233 | print(str) 234 | 235 | def print(self): 236 | self.display(pprint=True) # print results 237 | 238 | def show(self): 239 | self.display(show=True) # show results 240 | 241 | def save(self): 242 | self.display(save=True) # save results 243 | 244 | def __len__(self): 245 | return self.n 246 | 247 | def tolist(self): 248 | # return a list of Detections objects, i.e. 'for result in results.tolist():' 249 | x = [Detections([self.imgs[i]], [self.pred[i]], self.names) for i in range(self.n)] 250 | for d in x: 251 | for k in ['imgs', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']: 252 | setattr(d, k, getattr(d, k)[0]) # pop out of list 253 | return x 254 | 255 | 256 | class Flatten(nn.Module): 257 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 258 | @staticmethod 259 | def forward(x): 260 | return x.view(x.size(0), -1) 261 | 262 | 263 | class Classify(nn.Module): 264 | # Classification head, i.e. x(b,c1,20,20) to x(b,c2) 265 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups 266 | super(Classify, self).__init__() 267 | self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1) 268 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1) 269 | self.flat = Flatten() 270 | 271 | def forward(self, x): 272 | z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list 273 | return self.flat(self.conv(z)) # flatten to x(b,c2) 274 | -------------------------------------------------------------------------------- /yolov5/models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k, s): 62 | super(GhostBottleneck, self).__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super(MixConv2d, self).__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super(Ensemble, self).__init__() 102 | 103 | def forward(self, x, augment=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.cat(y, 1) # nms ensemble 109 | y = torch.stack(y).mean(0) # mean ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None): 114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 115 | model = Ensemble() 116 | for w in weights if isinstance(weights, list) else [weights]: 117 | attempt_download(w) 118 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 119 | 120 | # Compatibility updates 121 | for m in model.modules(): 122 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 123 | m.inplace = True # pytorch 1.7.0 compatibility 124 | elif type(m) is Conv: 125 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 126 | 127 | if len(model) == 1: 128 | return model[-1] # return model 129 | else: 130 | print('Ensemble created with %s\n' % weights) 131 | for k in ['names', 'stride']: 132 | setattr(model, k, getattr(model[-1], k)) 133 | return model # return ensemble 134 | -------------------------------------------------------------------------------- /yolov5/models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | gs = int(max(model.stride)) # grid size (max stride) 38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv): # assign export-friendly activations 47 | if isinstance(m.act, nn.Hardswish): 48 | m.act = Hardswish() 49 | elif isinstance(m.act, nn.SiLU): 50 | m.act = SiLU() 51 | # elif isinstance(m, models.yolo.Detect): 52 | # m.forward = m.forward_export # assign forward (optional) 53 | model.model[-1].export = True # set Detect() layer export=True 54 | y = model(img) # dry run 55 | 56 | # TorchScript export 57 | try: 58 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 59 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 60 | ts = torch.jit.trace(model, img) 61 | ts.save(f) 62 | print('TorchScript export success, saved as %s' % f) 63 | except Exception as e: 64 | print('TorchScript export failure: %s' % e) 65 | 66 | # ONNX export 67 | try: 68 | import onnx 69 | 70 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 71 | f = opt.weights.replace('.pt', '.onnx') # filename 72 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 73 | output_names=['classes', 'boxes'] if y is None else ['output']) 74 | 75 | # Checks 76 | onnx_model = onnx.load(f) # load onnx model 77 | onnx.checker.check_model(onnx_model) # check onnx model 78 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 79 | print('ONNX export success, saved as %s' % f) 80 | except Exception as e: 81 | print('ONNX export failure: %s' % e) 82 | 83 | # CoreML export 84 | try: 85 | import coremltools as ct 86 | 87 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 88 | # convert model from torchscript and apply pixel scaling as per detect.py 89 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 90 | f = opt.weights.replace('.pt', '.mlmodel') # filename 91 | model.save(f) 92 | print('CoreML export success, saved as %s' % f) 93 | except Exception as e: 94 | print('CoreML export failure: %s' % e) 95 | 96 | # Finish 97 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 98 | -------------------------------------------------------------------------------- /yolov5/models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /yolov5/models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /yolov5/models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, [1, 1]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /yolov5/models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 6, BottleneckCSP, [1024]], # 9 25 | ] 26 | 27 | # YOLOv5 FPN head 28 | head: 29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /yolov5/models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5/models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import math 4 | import sys 5 | from copy import deepcopy 6 | from pathlib import Path 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | logger = logging.getLogger(__name__) 13 | 14 | from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, C3, Concat, NMS, autoShape 15 | from models.experimental import MixConv2d, CrossConv 16 | from utils.autoanchor import check_anchor_order 17 | from utils.general import make_divisible, check_file, set_logging 18 | from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 19 | select_device, copy_attr 20 | 21 | try: 22 | import thop # for FLOPS computation 23 | except ImportError: 24 | thop = None 25 | 26 | 27 | class Detect(nn.Module): 28 | stride = None # strides computed during build 29 | export = False # onnx export 30 | 31 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 32 | super(Detect, self).__init__() 33 | self.nc = nc # number of classes 34 | self.no = nc + 5 # number of outputs per anchor 35 | self.nl = len(anchors) # number of detection layers 36 | self.na = len(anchors[0]) // 2 # number of anchors 37 | self.grid = [torch.zeros(1)] * self.nl # init grid 38 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 39 | self.register_buffer('anchors', a) # shape(nl,na,2) 40 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 41 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 42 | 43 | def forward(self, x): 44 | # x = x.copy() # for profiling 45 | z = [] # inference output 46 | self.training |= self.export 47 | for i in range(self.nl): 48 | x[i] = self.m[i](x[i]) # conv 49 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 50 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 51 | 52 | if not self.training: # inference 53 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 54 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 55 | 56 | y = x[i].sigmoid() 57 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy 58 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 59 | z.append(y.view(bs, -1, self.no)) 60 | 61 | return x if self.training else (torch.cat(z, 1), x) 62 | 63 | @staticmethod 64 | def _make_grid(nx=20, ny=20): 65 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 66 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 67 | 68 | 69 | class Model(nn.Module): 70 | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None): # model, input channels, number of classes 71 | super(Model, self).__init__() 72 | if isinstance(cfg, dict): 73 | self.yaml = cfg # model dict 74 | else: # is *.yaml 75 | import yaml # for torch hub 76 | self.yaml_file = Path(cfg).name 77 | with open(cfg) as f: 78 | self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict 79 | 80 | # Define model 81 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 82 | if nc and nc != self.yaml['nc']: 83 | logger.info('Overriding model.yaml nc=%g with nc=%g' % (self.yaml['nc'], nc)) 84 | self.yaml['nc'] = nc # override yaml value 85 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 86 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 87 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 88 | 89 | # Build strides, anchors 90 | m = self.model[-1] # Detect() 91 | if isinstance(m, Detect): 92 | s = 128 # 2x min stride 93 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 94 | m.anchors /= m.stride.view(-1, 1, 1) 95 | check_anchor_order(m) 96 | self.stride = m.stride 97 | self._initialize_biases() # only run once 98 | # print('Strides: %s' % m.stride.tolist()) 99 | 100 | # Init weights, biases 101 | initialize_weights(self) 102 | self.info() 103 | logger.info('') 104 | 105 | def forward(self, x, augment=False, profile=False): 106 | if augment: 107 | img_size = x.shape[-2:] # height, width 108 | s = [1, 0.83, 0.67] # scales 109 | f = [None, 3, None] # flips (2-ud, 3-lr) 110 | y = [] # outputs 111 | for si, fi in zip(s, f): 112 | xi = scale_img(x.flip(fi) if fi else x, si) 113 | yi = self.forward_once(xi)[0] # forward 114 | # cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 115 | yi[..., :4] /= si # de-scale 116 | if fi == 2: 117 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 118 | elif fi == 3: 119 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 120 | y.append(yi) 121 | return torch.cat(y, 1), None # augmented inference, train 122 | else: 123 | return self.forward_once(x, profile) # single-scale inference, train 124 | 125 | def forward_once(self, x, profile=False): 126 | y, dt = [], [] # outputs 127 | for m in self.model: 128 | if m.f != -1: # if not from previous layer 129 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 130 | 131 | if profile: 132 | o = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS 133 | t = time_synchronized() 134 | for _ in range(10): 135 | _ = m(x) 136 | dt.append((time_synchronized() - t) * 100) 137 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 138 | 139 | x = m(x) # run 140 | y.append(x if m.i in self.save else None) # save output 141 | 142 | if profile: 143 | print('%.1fms total' % sum(dt)) 144 | return x 145 | 146 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 147 | # https://arxiv.org/abs/1708.02002 section 3.3 148 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 149 | m = self.model[-1] # Detect() module 150 | for mi, s in zip(m.m, m.stride): # from 151 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 152 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 153 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 154 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 155 | 156 | def _print_biases(self): 157 | m = self.model[-1] # Detect() module 158 | for mi in m.m: # from 159 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 160 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 161 | 162 | # def _print_weights(self): 163 | # for m in self.model.modules(): 164 | # if type(m) is Bottleneck: 165 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 166 | 167 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 168 | print('Fusing layers... ') 169 | for m in self.model.modules(): 170 | if type(m) is Conv and hasattr(m, 'bn'): 171 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 172 | delattr(m, 'bn') # remove batchnorm 173 | m.forward = m.fuseforward # update forward 174 | self.info() 175 | return self 176 | 177 | def nms(self, mode=True): # add or remove NMS module 178 | present = type(self.model[-1]) is NMS # last layer is NMS 179 | if mode and not present: 180 | print('Adding NMS... ') 181 | m = NMS() # module 182 | m.f = -1 # from 183 | m.i = self.model[-1].i + 1 # index 184 | self.model.add_module(name='%s' % m.i, module=m) # add 185 | self.eval() 186 | elif not mode and present: 187 | print('Removing NMS... ') 188 | self.model = self.model[:-1] # remove 189 | return self 190 | 191 | def autoshape(self): # add autoShape module 192 | print('Adding autoShape... ') 193 | m = autoShape(self) # wrap model 194 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 195 | return m 196 | 197 | def info(self, verbose=False, img_size=640): # print model information 198 | model_info(self, verbose, img_size) 199 | 200 | 201 | def parse_model(d, ch): # model_dict, input_channels(3) 202 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 203 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 204 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 205 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 206 | 207 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 208 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 209 | m = eval(m) if isinstance(m, str) else m # eval strings 210 | for j, a in enumerate(args): 211 | try: 212 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 213 | except: 214 | pass 215 | 216 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 217 | if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]: 218 | c1, c2 = ch[f], args[0] 219 | 220 | # Normal 221 | # if i > 0 and args[0] != no: # channel expansion factor 222 | # ex = 1.75 # exponential (default 2.0) 223 | # e = math.log(c2 / ch[1]) / math.log(2) 224 | # c2 = int(ch[1] * ex ** e) 225 | # if m != Focus: 226 | 227 | c2 = make_divisible(c2 * gw, 8) if c2 != no else c2 228 | 229 | # Experimental 230 | # if i > 0 and args[0] != no: # channel expansion factor 231 | # ex = 1 + gw # exponential (default 2.0) 232 | # ch1 = 32 # ch[1] 233 | # e = math.log(c2 / ch1) / math.log(2) # level 1-n 234 | # c2 = int(ch1 * ex ** e) 235 | # if m != Focus: 236 | # c2 = make_divisible(c2, 8) if c2 != no else c2 237 | 238 | args = [c1, c2, *args[1:]] 239 | if m in [BottleneckCSP, C3]: 240 | args.insert(2, n) 241 | n = 1 242 | elif m is nn.BatchNorm2d: 243 | args = [ch[f]] 244 | elif m is Concat: 245 | c2 = sum([ch[-1 if x == -1 else x + 1] for x in f]) 246 | elif m is Detect: 247 | args.append([ch[x + 1] for x in f]) 248 | if isinstance(args[1], int): # number of anchors 249 | args[1] = [list(range(args[1] * 2))] * len(f) 250 | else: 251 | c2 = ch[f] 252 | 253 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 254 | t = str(m)[8:-2].replace('__main__.', '') # module type 255 | np = sum([x.numel() for x in m_.parameters()]) # number params 256 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 257 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 258 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 259 | layers.append(m_) 260 | ch.append(c2) 261 | return nn.Sequential(*layers), sorted(save) 262 | 263 | 264 | if __name__ == '__main__': 265 | parser = argparse.ArgumentParser() 266 | parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') 267 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 268 | opt = parser.parse_args() 269 | opt.cfg = check_file(opt.cfg) # check file 270 | set_logging() 271 | device = select_device(opt.device) 272 | 273 | # Create model 274 | model = Model(opt.cfg).to(device) 275 | model.train() 276 | 277 | # Profile 278 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 279 | # y = model(img, profile=True) 280 | 281 | # Tensorboard 282 | # from torch.utils.tensorboard import SummaryWriter 283 | # tb_writer = SummaryWriter() 284 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 285 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 286 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard 287 | -------------------------------------------------------------------------------- /yolov5/models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5/models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5/models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5/models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /yolov5/requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | 3 | # base ---------------------------------------- 4 | Cython 5 | matplotlib>=3.2.2 6 | numpy>=1.18.5 7 | opencv-python>=4.1.2 8 | Pillow 9 | PyYAML>=5.3 10 | scipy>=1.4.1 11 | tensorboard>=2.2 12 | torch>=1.7.0 13 | torchvision>=0.8.1 14 | tqdm>=4.41.0 15 | 16 | # logging ------------------------------------- 17 | # wandb 18 | 19 | # plotting ------------------------------------ 20 | seaborn>=0.11.0 21 | pandas 22 | 23 | # export -------------------------------------- 24 | # coremltools==4.0 25 | # onnx>=1.8.0 26 | # scikit-learn==0.19.2 # for coreml quantization 27 | 28 | # extras -------------------------------------- 29 | thop # FLOPS computation 30 | pycocotools>=2.0 # COCO mAP 31 | -------------------------------------------------------------------------------- /yolov5/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import os 4 | from pathlib import Path 5 | from threading import Thread 6 | 7 | import numpy as np 8 | import torch 9 | import yaml 10 | from tqdm import tqdm 11 | 12 | from models.experimental import attempt_load 13 | from utils.datasets import create_dataloader 14 | from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, box_iou, \ 15 | non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path 16 | from utils.loss import compute_loss 17 | from utils.metrics import ap_per_class, ConfusionMatrix 18 | from utils.plots import plot_images, output_to_target, plot_study_txt 19 | from utils.torch_utils import select_device, time_synchronized 20 | 21 | 22 | def test(data, 23 | weights=None, 24 | batch_size=32, 25 | imgsz=640, 26 | conf_thres=0.001, 27 | iou_thres=0.6, # for NMS 28 | save_json=False, 29 | single_cls=False, 30 | augment=False, 31 | verbose=False, 32 | model=None, 33 | dataloader=None, 34 | save_dir=Path(''), # for saving images 35 | save_txt=False, # for auto-labelling 36 | save_hybrid=False, # for hybrid auto-labelling 37 | save_conf=False, # save auto-label confidences 38 | plots=True, 39 | log_imgs=0): # number of logged images 40 | 41 | # Initialize/load model and set device 42 | training = model is not None 43 | if training: # called by train.py 44 | device = next(model.parameters()).device # get model device 45 | 46 | else: # called directly 47 | set_logging() 48 | device = select_device(opt.device, batch_size=batch_size) 49 | 50 | # Directories 51 | save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run 52 | (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir 53 | 54 | # Load model 55 | model = attempt_load(weights, map_location=device) # load FP32 model 56 | imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size 57 | 58 | # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 59 | # if device.type != 'cpu' and torch.cuda.device_count() > 1: 60 | # model = nn.DataParallel(model) 61 | 62 | # Half 63 | half = device.type != 'cpu' # half precision only supported on CUDA 64 | if half: 65 | model.half() 66 | 67 | # Configure 68 | model.eval() 69 | is_coco = data.endswith('coco.yaml') # is COCO dataset 70 | with open(data) as f: 71 | data = yaml.load(f, Loader=yaml.FullLoader) # model dict 72 | check_dataset(data) # check 73 | nc = 1 if single_cls else int(data['nc']) # number of classes 74 | iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95 75 | niou = iouv.numel() 76 | 77 | # Logging 78 | log_imgs, wandb = min(log_imgs, 100), None # ceil 79 | try: 80 | import wandb # Weights & Biases 81 | except ImportError: 82 | log_imgs = 0 83 | 84 | # Dataloader 85 | if not training: 86 | img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img 87 | _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once 88 | path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images 89 | dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0] 90 | 91 | seen = 0 92 | confusion_matrix = ConfusionMatrix(nc=nc) 93 | names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} 94 | coco91class = coco80_to_coco91_class() 95 | s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@.5', 'mAP@.5:.95') 96 | p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. 97 | loss = torch.zeros(3, device=device) 98 | jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] 99 | for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): 100 | img = img.to(device, non_blocking=True) 101 | img = img.half() if half else img.float() # uint8 to fp16/32 102 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 103 | targets = targets.to(device) 104 | nb, _, height, width = img.shape # batch size, channels, height, width 105 | 106 | with torch.no_grad(): 107 | # Run model 108 | t = time_synchronized() 109 | inf_out, train_out = model(img, augment=augment) # inference and training outputs 110 | t0 += time_synchronized() - t 111 | 112 | # Compute loss 113 | if training: 114 | loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls 115 | 116 | # Run NMS 117 | targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels 118 | lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling 119 | t = time_synchronized() 120 | output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb) 121 | t1 += time_synchronized() - t 122 | 123 | # Statistics per image 124 | for si, pred in enumerate(output): 125 | labels = targets[targets[:, 0] == si, 1:] 126 | nl = len(labels) 127 | tcls = labels[:, 0].tolist() if nl else [] # target class 128 | path = Path(paths[si]) 129 | seen += 1 130 | 131 | if len(pred) == 0: 132 | if nl: 133 | stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) 134 | continue 135 | 136 | # Predictions 137 | predn = pred.clone() 138 | scale_coords(img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred 139 | 140 | # Append to text file 141 | if save_txt: 142 | gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh 143 | for *xyxy, conf, cls in predn.tolist(): 144 | xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh 145 | line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format 146 | with open(save_dir / 'labels' / (path.stem + '.txt'), 'a') as f: 147 | f.write(('%g ' * len(line)).rstrip() % line + '\n') 148 | 149 | # W&B logging 150 | if plots and len(wandb_images) < log_imgs: 151 | box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, 152 | "class_id": int(cls), 153 | "box_caption": "%s %.3f" % (names[cls], conf), 154 | "scores": {"class_score": conf}, 155 | "domain": "pixel"} for *xyxy, conf, cls in pred.tolist()] 156 | boxes = {"predictions": {"box_data": box_data, "class_labels": names}} # inference-space 157 | wandb_images.append(wandb.Image(img[si], boxes=boxes, caption=path.name)) 158 | 159 | # Append to pycocotools JSON dictionary 160 | if save_json: 161 | # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... 162 | image_id = int(path.stem) if path.stem.isnumeric() else path.stem 163 | box = xyxy2xywh(predn[:, :4]) # xywh 164 | box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner 165 | for p, b in zip(pred.tolist(), box.tolist()): 166 | jdict.append({'image_id': image_id, 167 | 'category_id': coco91class[int(p[5])] if is_coco else int(p[5]), 168 | 'bbox': [round(x, 3) for x in b], 169 | 'score': round(p[4], 5)}) 170 | 171 | # Assign all predictions as incorrect 172 | correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) 173 | if nl: 174 | detected = [] # target indices 175 | tcls_tensor = labels[:, 0] 176 | 177 | # target boxes 178 | tbox = xywh2xyxy(labels[:, 1:5]) 179 | scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels 180 | if plots: 181 | confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1)) 182 | 183 | # Per target class 184 | for cls in torch.unique(tcls_tensor): 185 | ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # prediction indices 186 | pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # target indices 187 | 188 | # Search for detections 189 | if pi.shape[0]: 190 | # Prediction to target ious 191 | ious, i = box_iou(predn[pi, :4], tbox[ti]).max(1) # best ious, indices 192 | 193 | # Append detections 194 | detected_set = set() 195 | for j in (ious > iouv[0]).nonzero(as_tuple=False): 196 | d = ti[i[j]] # detected target 197 | if d.item() not in detected_set: 198 | detected_set.add(d.item()) 199 | detected.append(d) 200 | correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn 201 | if len(detected) == nl: # all targets already located in image 202 | break 203 | 204 | # Append statistics (correct, conf, pcls, tcls) 205 | stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) 206 | 207 | # Plot images 208 | if plots and batch_i < 3: 209 | f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels 210 | Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() 211 | f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions 212 | Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start() 213 | 214 | # Compute statistics 215 | stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy 216 | if len(stats) and stats[0].any(): 217 | p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) 218 | p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95] 219 | mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() 220 | nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class 221 | else: 222 | nt = torch.zeros(1) 223 | 224 | # Print results 225 | pf = '%20s' + '%12.3g' * 6 # print format 226 | print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) 227 | 228 | # Print results per class 229 | if verbose and nc > 1 and len(stats): 230 | for i, c in enumerate(ap_class): 231 | print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) 232 | 233 | # Print speeds 234 | t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple 235 | if not training: 236 | print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) 237 | 238 | # Plots 239 | if plots: 240 | confusion_matrix.plot(save_dir=save_dir, names=list(names.values())) 241 | if wandb and wandb.run: 242 | wandb.log({"Images": wandb_images}) 243 | wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]}) 244 | 245 | # Save JSON 246 | if save_json and len(jdict): 247 | w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights 248 | anno_json = '../coco/annotations/instances_val2017.json' # annotations json 249 | pred_json = str(save_dir / f"{w}_predictions.json") # predictions json 250 | print('\nEvaluating pycocotools mAP... saving %s...' % pred_json) 251 | with open(pred_json, 'w') as f: 252 | json.dump(jdict, f) 253 | 254 | try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb 255 | from pycocotools.coco import COCO 256 | from pycocotools.cocoeval import COCOeval 257 | 258 | anno = COCO(anno_json) # init annotations api 259 | pred = anno.loadRes(pred_json) # init predictions api 260 | eval = COCOeval(anno, pred, 'bbox') 261 | if is_coco: 262 | eval.params.imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] # image IDs to evaluate 263 | eval.evaluate() 264 | eval.accumulate() 265 | eval.summarize() 266 | map, map50 = eval.stats[:2] # update results (mAP@0.5:0.95, mAP@0.5) 267 | except Exception as e: 268 | print(f'pycocotools unable to run: {e}') 269 | 270 | # Return results 271 | if not training: 272 | s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' 273 | print(f"Results saved to {save_dir}{s}") 274 | model.float() # for training 275 | maps = np.zeros(nc) + map 276 | for i, c in enumerate(ap_class): 277 | maps[c] = ap[i] 278 | return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t 279 | 280 | 281 | if __name__ == '__main__': 282 | parser = argparse.ArgumentParser(prog='test.py') 283 | parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)') 284 | parser.add_argument('--data', type=str, default='data/coco128.yaml', help='*.data path') 285 | parser.add_argument('--batch-size', type=int, default=32, help='size of each image batch') 286 | parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)') 287 | parser.add_argument('--conf-thres', type=float, default=0.001, help='object confidence threshold') 288 | parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS') 289 | parser.add_argument('--task', default='val', help="'val', 'test', 'study'") 290 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 291 | parser.add_argument('--single-cls', action='store_true', help='treat as single-class dataset') 292 | parser.add_argument('--augment', action='store_true', help='augmented inference') 293 | parser.add_argument('--verbose', action='store_true', help='report mAP by class') 294 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 295 | parser.add_argument('--save-hybrid', action='store_true', help='save label+prediction hybrid results to *.txt') 296 | parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels') 297 | parser.add_argument('--save-json', action='store_true', help='save a cocoapi-compatible JSON results file') 298 | parser.add_argument('--project', default='runs/test', help='save to project/name') 299 | parser.add_argument('--name', default='exp', help='save to project/name') 300 | parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') 301 | opt = parser.parse_args() 302 | opt.save_json |= opt.data.endswith('coco.yaml') 303 | opt.data = check_file(opt.data) # check file 304 | print(opt) 305 | 306 | if opt.task in ['val', 'test']: # run normally 307 | test(opt.data, 308 | opt.weights, 309 | opt.batch_size, 310 | opt.img_size, 311 | opt.conf_thres, 312 | opt.iou_thres, 313 | opt.save_json, 314 | opt.single_cls, 315 | opt.augment, 316 | opt.verbose, 317 | save_txt=opt.save_txt | opt.save_hybrid, 318 | save_hybrid=opt.save_hybrid, 319 | save_conf=opt.save_conf, 320 | ) 321 | 322 | elif opt.task == 'study': # run over a range of settings and save/plot 323 | for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']: 324 | f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to 325 | x = list(range(320, 800, 64)) # x axis 326 | y = [] # y axis 327 | for i in x: # img-size 328 | print('\nRunning %s point %s...' % (f, i)) 329 | r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json, 330 | plots=False) 331 | y.append(r + t) # results and times 332 | np.savetxt(f, y, fmt='%10.4g') # save 333 | os.system('zip -r study.zip study_*.txt') 334 | plot_study_txt(f, x) # plot 335 | -------------------------------------------------------------------------------- /yolov5/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tensorturtle/classy-sort-yolov5/adc650f966b172fd925aff6650e31a03f09fc4c6/yolov5/utils/__init__.py -------------------------------------------------------------------------------- /yolov5/utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /yolov5/utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | 9 | 10 | def check_anchor_order(m): 11 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 12 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 13 | da = a[-1] - a[0] # delta a 14 | ds = m.stride[-1] - m.stride[0] # delta s 15 | if da.sign() != ds.sign(): # same order 16 | print('Reversing anchor order') 17 | m.anchors[:] = m.anchors.flip(0) 18 | m.anchor_grid[:] = m.anchor_grid.flip(0) 19 | 20 | 21 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 22 | # Check anchor fit to data, recompute if necessary 23 | print('\nAnalyzing anchors... ', end='') 24 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 25 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 26 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 27 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 28 | 29 | def metric(k): # compute metric 30 | r = wh[:, None] / k[None] 31 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 32 | best = x.max(1)[0] # best_x 33 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 34 | bpr = (best > 1. / thr).float().mean() # best possible recall 35 | return bpr, aat 36 | 37 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 38 | print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='') 39 | if bpr < 0.98: # threshold to recompute 40 | print('. Attempting to improve anchors, please wait...') 41 | na = m.anchor_grid.numel() // 2 # number of anchors 42 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 43 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 44 | if new_bpr > bpr: # replace anchors 45 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) 46 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference 47 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 48 | check_anchor_order(m) 49 | print('New anchors saved to model. Update model *.yaml to use these anchors in the future.') 50 | else: 51 | print('Original anchors better than new anchors. Proceeding with original anchors.') 52 | print('') # newline 53 | 54 | 55 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 56 | """ Creates kmeans-evolved anchors from training dataset 57 | 58 | Arguments: 59 | path: path to dataset *.yaml, or a loaded dataset 60 | n: number of anchors 61 | img_size: image size used for training 62 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 63 | gen: generations to evolve anchors using genetic algorithm 64 | verbose: print all results 65 | 66 | Return: 67 | k: kmeans evolved anchors 68 | 69 | Usage: 70 | from utils.autoanchor import *; _ = kmean_anchors() 71 | """ 72 | thr = 1. / thr 73 | 74 | def metric(k, wh): # compute metrics 75 | r = wh[:, None] / k[None] 76 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 77 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 78 | return x, x.max(1)[0] # x, best_x 79 | 80 | def anchor_fitness(k): # mutation fitness 81 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 82 | return (best * (best > thr).float()).mean() # fitness 83 | 84 | def print_results(k): 85 | k = k[np.argsort(k.prod(1))] # sort small to large 86 | x, best = metric(k, wh0) 87 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 88 | print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat)) 89 | print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' % 90 | (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='') 91 | for i, x in enumerate(k): 92 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 93 | return k 94 | 95 | if isinstance(path, str): # *.yaml file 96 | with open(path) as f: 97 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict 98 | from utils.datasets import LoadImagesAndLabels 99 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 100 | else: 101 | dataset = path # dataset 102 | 103 | # Get label wh 104 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 105 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 106 | 107 | # Filter 108 | i = (wh0 < 3.0).any(1).sum() 109 | if i: 110 | print('WARNING: Extremely small objects found. ' 111 | '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0))) 112 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 113 | 114 | # Kmeans calculation 115 | print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) 116 | s = wh.std(0) # sigmas for whitening 117 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 118 | k *= s 119 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 120 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 121 | k = print_results(k) 122 | 123 | # Plot 124 | # k, d = [None] * 20, [None] * 20 125 | # for i in tqdm(range(1, 21)): 126 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 127 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 128 | # ax = ax.ravel() 129 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 130 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 131 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 132 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 133 | # fig.savefig('wh.png', dpi=200) 134 | 135 | # Evolve 136 | npr = np.random 137 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 138 | pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar 139 | for _ in pbar: 140 | v = np.ones(sh) 141 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 142 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 143 | kg = (k.copy() * v).clip(min=2.0) 144 | fg = anchor_fitness(kg) 145 | if fg > f: 146 | f, k = fg, kg.copy() 147 | pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f 148 | if verbose: 149 | print_results(k) 150 | 151 | return print_results(k) 152 | -------------------------------------------------------------------------------- /yolov5/utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /yolov5/utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==18.1 3 | Flask==1.0.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /yolov5/utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /yolov5/utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import torch 10 | 11 | 12 | def gsutil_getsize(url=''): 13 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 14 | s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8') 15 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 16 | 17 | 18 | def attempt_download(weights): 19 | # Attempt to download pretrained weights if not found locally 20 | weights = str(weights).strip().replace("'", '') 21 | file = Path(weights).name.lower() 22 | 23 | msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/' 24 | models = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'] # available models 25 | redundant = False # offer second download option 26 | 27 | if file in models and not os.path.isfile(weights): 28 | # Google Drive 29 | # d = {'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO', 30 | # 'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr', 31 | # 'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV', 32 | # 'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS'} 33 | # r = gdrive_download(id=d[file], name=weights) if file in d else 1 34 | # if r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6: # check 35 | # return 36 | 37 | try: # GitHub 38 | url = 'https://github.com/ultralytics/yolov5/releases/download/v3.1/' + file 39 | print('Downloading %s to %s...' % (url, weights)) 40 | torch.hub.download_url_to_file(url, weights) 41 | assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check 42 | except Exception as e: # GCP 43 | print('Download error: %s' % e) 44 | assert redundant, 'No secondary mirror' 45 | url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file 46 | print('Downloading %s to %s...' % (url, weights)) 47 | r = os.system('curl -L %s -o %s' % (url, weights)) # torch.hub.download_url_to_file(url, weights) 48 | finally: 49 | if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6): # check 50 | os.remove(weights) if os.path.exists(weights) else None # remove partial downloads 51 | print('ERROR: Download failure: %s' % msg) 52 | print('') 53 | return 54 | 55 | 56 | def gdrive_download(id='1uH2BylpFxHKEGXKL6wJJlsgMU2YEjxuc', name='tmp.zip'): 57 | # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download() 58 | t = time.time() 59 | 60 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 61 | os.remove(name) if os.path.exists(name) else None # remove existing 62 | os.remove('cookie') if os.path.exists('cookie') else None 63 | 64 | # Attempt file download 65 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 66 | os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) 67 | if os.path.exists('cookie'): # large file 68 | s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) 69 | else: # small file 70 | s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) 71 | r = os.system(s) # execute, capture return 72 | os.remove('cookie') if os.path.exists('cookie') else None 73 | 74 | # Error check 75 | if r != 0: 76 | os.remove(name) if os.path.exists(name) else None # remove partial 77 | print('Download error ') # raise Exception('Download error') 78 | return r 79 | 80 | # Unzip if archive 81 | if name.endswith('.zip'): 82 | print('unzipping... ', end='') 83 | os.system('unzip -q %s' % name) # unzip 84 | os.remove(name) # remove zip to free space 85 | 86 | print('Done (%.1fs)' % (time.time() - t)) 87 | return r 88 | 89 | 90 | def get_token(cookie="./cookie"): 91 | with open(cookie) as f: 92 | for line in f: 93 | if "download" in line: 94 | return line.split()[-1] 95 | return "" 96 | 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 98 | # # Uploads a file to a bucket 99 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 100 | # 101 | # storage_client = storage.Client() 102 | # bucket = storage_client.get_bucket(bucket_name) 103 | # blob = bucket.blob(destination_blob_name) 104 | # 105 | # blob.upload_from_filename(source_file_name) 106 | # 107 | # print('File {} uploaded to {}.'.format( 108 | # source_file_name, 109 | # destination_blob_name)) 110 | # 111 | # 112 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 113 | # # Uploads a blob from a bucket 114 | # storage_client = storage.Client() 115 | # bucket = storage_client.get_bucket(bucket_name) 116 | # blob = bucket.blob(source_blob_name) 117 | # 118 | # blob.download_to_filename(destination_file_name) 119 | # 120 | # print('Blob {} downloaded to {}.'.format( 121 | # source_blob_name, 122 | # destination_file_name)) 123 | -------------------------------------------------------------------------------- /yolov5/utils/loss.py: -------------------------------------------------------------------------------- 1 | # Loss functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from utils.general import bbox_iou 7 | from utils.torch_utils import is_parallel 8 | 9 | 10 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 11 | # return positive, negative label smoothing BCE targets 12 | return 1.0 - 0.5 * eps, 0.5 * eps 13 | 14 | 15 | class BCEBlurWithLogitsLoss(nn.Module): 16 | # BCEwithLogitLoss() with reduced missing label effects. 17 | def __init__(self, alpha=0.05): 18 | super(BCEBlurWithLogitsLoss, self).__init__() 19 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() 20 | self.alpha = alpha 21 | 22 | def forward(self, pred, true): 23 | loss = self.loss_fcn(pred, true) 24 | pred = torch.sigmoid(pred) # prob from logits 25 | dx = pred - true # reduce only missing label effects 26 | # dx = (pred - true).abs() # reduce missing label and false label effects 27 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 28 | loss *= alpha_factor 29 | return loss.mean() 30 | 31 | 32 | class FocalLoss(nn.Module): 33 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 34 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 35 | super(FocalLoss, self).__init__() 36 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 37 | self.gamma = gamma 38 | self.alpha = alpha 39 | self.reduction = loss_fcn.reduction 40 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 41 | 42 | def forward(self, pred, true): 43 | loss = self.loss_fcn(pred, true) 44 | # p_t = torch.exp(-loss) 45 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability 46 | 47 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py 48 | pred_prob = torch.sigmoid(pred) # prob from logits 49 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 50 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 51 | modulating_factor = (1.0 - p_t) ** self.gamma 52 | loss *= alpha_factor * modulating_factor 53 | 54 | if self.reduction == 'mean': 55 | return loss.mean() 56 | elif self.reduction == 'sum': 57 | return loss.sum() 58 | else: # 'none' 59 | return loss 60 | 61 | 62 | class QFocalLoss(nn.Module): 63 | # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 64 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 65 | super(QFocalLoss, self).__init__() 66 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 67 | self.gamma = gamma 68 | self.alpha = alpha 69 | self.reduction = loss_fcn.reduction 70 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 71 | 72 | def forward(self, pred, true): 73 | loss = self.loss_fcn(pred, true) 74 | 75 | pred_prob = torch.sigmoid(pred) # prob from logits 76 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 77 | modulating_factor = torch.abs(true - pred_prob) ** self.gamma 78 | loss *= alpha_factor * modulating_factor 79 | 80 | if self.reduction == 'mean': 81 | return loss.mean() 82 | elif self.reduction == 'sum': 83 | return loss.sum() 84 | else: # 'none' 85 | return loss 86 | 87 | 88 | def compute_loss(p, targets, model): # predictions, targets, model 89 | device = targets.device 90 | lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) 91 | tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets 92 | h = model.hyp # hyperparameters 93 | 94 | # Define criteria 95 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) # weight=model.class_weights) 96 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 97 | 98 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 99 | cp, cn = smooth_BCE(eps=0.0) 100 | 101 | # Focal loss 102 | g = h['fl_gamma'] # focal loss gamma 103 | if g > 0: 104 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 105 | 106 | # Losses 107 | nt = 0 # number of targets 108 | no = len(p) # number of outputs 109 | balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 110 | for i, pi in enumerate(p): # layer index, layer predictions 111 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 112 | tobj = torch.zeros_like(pi[..., 0], device=device) # target obj 113 | 114 | n = b.shape[0] # number of targets 115 | if n: 116 | nt += n # cumulative targets 117 | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets 118 | 119 | # Regression 120 | pxy = ps[:, :2].sigmoid() * 2. - 0.5 121 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] 122 | pbox = torch.cat((pxy, pwh), 1) # predicted box 123 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) 124 | lbox += (1.0 - iou).mean() # iou loss 125 | 126 | # Objectness 127 | tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio 128 | 129 | # Classification 130 | if model.nc > 1: # cls loss (only if multiple classes) 131 | t = torch.full_like(ps[:, 5:], cn, device=device) # targets 132 | t[range(n), tcls[i]] = cp 133 | lcls += BCEcls(ps[:, 5:], t) # BCE 134 | 135 | # Append targets to text file 136 | # with open('targets.txt', 'a') as file: 137 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] 138 | 139 | lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss 140 | 141 | s = 3 / no # output count scaling 142 | lbox *= h['box'] * s 143 | lobj *= h['obj'] * s * (1.4 if no == 4 else 1.) 144 | lcls *= h['cls'] * s 145 | bs = tobj.shape[0] # batch size 146 | 147 | loss = lbox + lobj + lcls 148 | return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() 149 | 150 | 151 | def build_targets(p, targets, model): 152 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 153 | det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module 154 | na, nt = det.na, targets.shape[0] # number of anchors, targets 155 | tcls, tbox, indices, anch = [], [], [], [] 156 | gain = torch.ones(7, device=targets.device) # normalized to gridspace gain 157 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 158 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices 159 | 160 | g = 0.5 # bias 161 | off = torch.tensor([[0, 0], 162 | [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m 163 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 164 | ], device=targets.device).float() * g # offsets 165 | 166 | for i in range(det.nl): 167 | anchors = det.anchors[i] 168 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain 169 | 170 | # Match targets to anchors 171 | t = targets * gain 172 | if nt: 173 | # Matches 174 | r = t[:, :, 4:6] / anchors[:, None] # wh ratio 175 | j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare 176 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 177 | t = t[j] # filter 178 | 179 | # Offsets 180 | gxy = t[:, 2:4] # grid xy 181 | gxi = gain[[2, 3]] - gxy # inverse 182 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T 183 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T 184 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 185 | t = t.repeat((5, 1, 1))[j] 186 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 187 | else: 188 | t = targets[0] 189 | offsets = 0 190 | 191 | # Define 192 | b, c = t[:, :2].long().T # image, class 193 | gxy = t[:, 2:4] # grid xy 194 | gwh = t[:, 4:6] # grid wh 195 | gij = (gxy - offsets).long() 196 | gi, gj = gij.T # grid xy indices 197 | 198 | # Append 199 | a = t[:, 6].long() # anchor indices 200 | indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices 201 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 202 | anch.append(anchors[a]) # anchors 203 | tcls.append(c) # class 204 | 205 | return tcls, tbox, indices, anch 206 | -------------------------------------------------------------------------------- /yolov5/utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]): 19 | """ Compute the average precision, given the recall and precision curves. 20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 21 | # Arguments 22 | tp: True positives (nparray, nx1 or nx10). 23 | conf: Objectness value from 0-1 (nparray). 24 | pred_cls: Predicted object classes (nparray). 25 | target_cls: True object classes (nparray). 26 | plot: Plot precision-recall curve at mAP@0.5 27 | save_dir: Plot save directory 28 | # Returns 29 | The average precision as computed in py-faster-rcnn. 30 | """ 31 | 32 | # Sort by objectness 33 | i = np.argsort(-conf) 34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 35 | 36 | # Find unique classes 37 | unique_classes = np.unique(target_cls) 38 | 39 | # Create Precision-Recall curve and compute AP for each class 40 | px, py = np.linspace(0, 1, 1000), [] # for plotting 41 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 42 | s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) 43 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) 44 | for ci, c in enumerate(unique_classes): 45 | i = pred_cls == c 46 | n_l = (target_cls == c).sum() # number of labels 47 | n_p = i.sum() # number of predictions 48 | 49 | if n_p == 0 or n_l == 0: 50 | continue 51 | else: 52 | # Accumulate FPs and TPs 53 | fpc = (1 - tp[i]).cumsum(0) 54 | tpc = tp[i].cumsum(0) 55 | 56 | # Recall 57 | recall = tpc / (n_l + 1e-16) # recall curve 58 | r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases 59 | 60 | # Precision 61 | precision = tpc / (tpc + fpc) # precision curve 62 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score 63 | 64 | # AP from recall-precision curve 65 | for j in range(tp.shape[1]): 66 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 67 | if plot and (j == 0): 68 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 69 | 70 | # Compute F1 score (harmonic mean of precision and recall) 71 | f1 = 2 * p * r / (p + r + 1e-16) 72 | 73 | if plot: 74 | plot_pr_curve(px, py, ap, save_dir, names) 75 | 76 | return p, r, ap, f1, unique_classes.astype('int32') 77 | 78 | 79 | def compute_ap(recall, precision): 80 | """ Compute the average precision, given the recall and precision curves 81 | # Arguments 82 | recall: The recall curve (list) 83 | precision: The precision curve (list) 84 | # Returns 85 | Average precision, precision curve, recall curve 86 | """ 87 | 88 | # Append sentinel values to beginning and end 89 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 90 | mpre = np.concatenate(([1.], precision, [0.])) 91 | 92 | # Compute the precision envelope 93 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 94 | 95 | # Integrate area under curve 96 | method = 'interp' # methods: 'continuous', 'interp' 97 | if method == 'interp': 98 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 99 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 100 | else: # 'continuous' 101 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 102 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 103 | 104 | return ap, mpre, mrec 105 | 106 | 107 | class ConfusionMatrix: 108 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 109 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 110 | self.matrix = np.zeros((nc + 1, nc + 1)) 111 | self.nc = nc # number of classes 112 | self.conf = conf 113 | self.iou_thres = iou_thres 114 | 115 | def process_batch(self, detections, labels): 116 | """ 117 | Return intersection-over-union (Jaccard index) of boxes. 118 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 119 | Arguments: 120 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 121 | labels (Array[M, 5]), class, x1, y1, x2, y2 122 | Returns: 123 | None, updates confusion matrix accordingly 124 | """ 125 | detections = detections[detections[:, 4] > self.conf] 126 | gt_classes = labels[:, 0].int() 127 | detection_classes = detections[:, 5].int() 128 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 129 | 130 | x = torch.where(iou > self.iou_thres) 131 | if x[0].shape[0]: 132 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 133 | if x[0].shape[0] > 1: 134 | matches = matches[matches[:, 2].argsort()[::-1]] 135 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 136 | matches = matches[matches[:, 2].argsort()[::-1]] 137 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 138 | else: 139 | matches = np.zeros((0, 3)) 140 | 141 | n = matches.shape[0] > 0 142 | m0, m1, _ = matches.transpose().astype(np.int16) 143 | for i, gc in enumerate(gt_classes): 144 | j = m0 == i 145 | if n and sum(j) == 1: 146 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 147 | else: 148 | self.matrix[gc, self.nc] += 1 # background FP 149 | 150 | if n: 151 | for i, dc in enumerate(detection_classes): 152 | if not any(m1 == i): 153 | self.matrix[self.nc, dc] += 1 # background FN 154 | 155 | def matrix(self): 156 | return self.matrix 157 | 158 | def plot(self, save_dir='', names=()): 159 | try: 160 | import seaborn as sn 161 | 162 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize 163 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 164 | 165 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 166 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 167 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels 168 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 169 | xticklabels=names + ['background FN'] if labels else "auto", 170 | yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1)) 171 | fig.axes[0].set_xlabel('True') 172 | fig.axes[0].set_ylabel('Predicted') 173 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 174 | except Exception as e: 175 | pass 176 | 177 | def print(self): 178 | for i in range(self.nc + 1): 179 | print(' '.join(map(str, self.matrix[i]))) 180 | 181 | 182 | # Plots ---------------------------------------------------------------------------------------------------------------- 183 | 184 | def plot_pr_curve(px, py, ap, save_dir='.', names=()): 185 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 186 | py = np.stack(py, axis=1) 187 | 188 | if 0 < len(names) < 21: # show mAP in legend if < 10 classes 189 | for i, y in enumerate(py.T): 190 | ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision) 191 | else: 192 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) 193 | 194 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 195 | ax.set_xlabel('Recall') 196 | ax.set_ylabel('Precision') 197 | ax.set_xlim(0, 1) 198 | ax.set_ylim(0, 1) 199 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 200 | fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250) 201 | -------------------------------------------------------------------------------- /yolov5/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # PyTorch utils 2 | 3 | import logging 4 | import math 5 | import os 6 | import time 7 | from contextlib import contextmanager 8 | from copy import deepcopy 9 | 10 | import torch 11 | import torch.backends.cudnn as cudnn 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torchvision 15 | 16 | try: 17 | import thop # for FLOPS computation 18 | except ImportError: 19 | thop = None 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | @contextmanager 24 | def torch_distributed_zero_first(local_rank: int): 25 | """ 26 | Decorator to make all processes in distributed training wait for each local_master to do something. 27 | """ 28 | if local_rank not in [-1, 0]: 29 | torch.distributed.barrier() 30 | yield 31 | if local_rank == 0: 32 | torch.distributed.barrier() 33 | 34 | 35 | def init_torch_seeds(seed=0): 36 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 37 | torch.manual_seed(seed) 38 | if seed == 0: # slower, more reproducible 39 | cudnn.deterministic = True 40 | cudnn.benchmark = False 41 | else: # faster, less reproducible 42 | cudnn.deterministic = False 43 | cudnn.benchmark = True 44 | 45 | 46 | def select_device(device='', batch_size=None): 47 | # device = 'cpu' or '0' or '0,1,2,3' 48 | cpu_request = device.lower() == 'cpu' 49 | if device and not cpu_request: # if device requested other than 'cpu' 50 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 51 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availablity 52 | 53 | cuda = False if cpu_request else torch.cuda.is_available() 54 | if cuda: 55 | c = 1024 ** 2 # bytes to MB 56 | ng = torch.cuda.device_count() 57 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 58 | assert batch_size % ng == 0, f'batch-size {batch_size} not multiple of GPU count {ng}' 59 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 60 | s = f'Using torch {torch.__version__} ' 61 | for i, d in enumerate((device or '0').split(',')): 62 | if i == 1: 63 | s = ' ' * len(s) 64 | logger.info(f"{s}CUDA:{d} ({x[i].name}, {x[i].total_memory / c}MB)") 65 | else: 66 | logger.info(f'Using torch {torch.__version__} CPU') 67 | 68 | logger.info('') # skip a line 69 | return torch.device('cuda:0' if cuda else 'cpu') 70 | 71 | 72 | def time_synchronized(): 73 | # pytorch-accurate time 74 | torch.cuda.synchronize() if torch.cuda.is_available() else None 75 | return time.time() 76 | 77 | 78 | def profile(x, ops, n=100, device=None): 79 | # profile a pytorch module or list of modules. Example usage: 80 | # x = torch.randn(16, 3, 640, 640) # input 81 | # m1 = lambda x: x * torch.sigmoid(x) 82 | # m2 = nn.SiLU() 83 | # profile(x, [m1, m2], n=100) # profile speed over 100 iterations 84 | 85 | device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 86 | x = x.to(device) 87 | x.requires_grad = True 88 | print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') 89 | print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") 90 | for m in ops if isinstance(ops, list) else [ops]: 91 | m = m.to(device) if hasattr(m, 'to') else m # device 92 | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type 93 | dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward 94 | try: 95 | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS 96 | except: 97 | flops = 0 98 | 99 | for _ in range(n): 100 | t[0] = time_synchronized() 101 | y = m(x) 102 | t[1] = time_synchronized() 103 | try: 104 | _ = y.sum().backward() 105 | t[2] = time_synchronized() 106 | except: # no backward method 107 | t[2] = float('nan') 108 | dtf += (t[1] - t[0]) * 1000 / n # ms per op forward 109 | dtb += (t[2] - t[1]) * 1000 / n # ms per op backward 110 | 111 | s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' 112 | s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' 113 | p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters 114 | print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') 115 | 116 | 117 | def is_parallel(model): 118 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 119 | 120 | 121 | def intersect_dicts(da, db, exclude=()): 122 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 123 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 124 | 125 | 126 | def initialize_weights(model): 127 | for m in model.modules(): 128 | t = type(m) 129 | if t is nn.Conv2d: 130 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 131 | elif t is nn.BatchNorm2d: 132 | m.eps = 1e-3 133 | m.momentum = 0.03 134 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 135 | m.inplace = True 136 | 137 | 138 | def find_modules(model, mclass=nn.Conv2d): 139 | # Finds layer indices matching module class 'mclass' 140 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 141 | 142 | 143 | def sparsity(model): 144 | # Return global model sparsity 145 | a, b = 0., 0. 146 | for p in model.parameters(): 147 | a += p.numel() 148 | b += (p == 0).sum() 149 | return b / a 150 | 151 | 152 | def prune(model, amount=0.3): 153 | # Prune model to requested global sparsity 154 | import torch.nn.utils.prune as prune 155 | print('Pruning model... ', end='') 156 | for name, m in model.named_modules(): 157 | if isinstance(m, nn.Conv2d): 158 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 159 | prune.remove(m, 'weight') # make permanent 160 | print(' %.3g global sparsity' % sparsity(model)) 161 | 162 | 163 | def fuse_conv_and_bn(conv, bn): 164 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 165 | fusedconv = nn.Conv2d(conv.in_channels, 166 | conv.out_channels, 167 | kernel_size=conv.kernel_size, 168 | stride=conv.stride, 169 | padding=conv.padding, 170 | groups=conv.groups, 171 | bias=True).requires_grad_(False).to(conv.weight.device) 172 | 173 | # prepare filters 174 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 175 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 176 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 177 | 178 | # prepare spatial bias 179 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 180 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 181 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 182 | 183 | return fusedconv 184 | 185 | 186 | def model_info(model, verbose=False, img_size=640): 187 | # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] 188 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 189 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 190 | if verbose: 191 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 192 | for i, (name, p) in enumerate(model.named_parameters()): 193 | name = name.replace('module_list.', '') 194 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 195 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 196 | 197 | try: # FLOPS 198 | from thop import profile 199 | stride = int(model.stride.max()) if hasattr(model, 'stride') else 32 200 | img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input 201 | flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS 202 | img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float 203 | fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS 204 | except (ImportError, Exception): 205 | fs = '' 206 | 207 | logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") 208 | 209 | 210 | def load_classifier(name='resnet101', n=2): 211 | # Loads a pretrained model reshaped to n-class output 212 | model = torchvision.models.__dict__[name](pretrained=True) 213 | 214 | # ResNet model properties 215 | # input_size = [3, 224, 224] 216 | # input_space = 'RGB' 217 | # input_range = [0, 1] 218 | # mean = [0.485, 0.456, 0.406] 219 | # std = [0.229, 0.224, 0.225] 220 | 221 | # Reshape output to n classes 222 | filters = model.fc.weight.shape[1] 223 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 224 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 225 | model.fc.out_features = n 226 | return model 227 | 228 | 229 | def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio 230 | # scales img(bs,3,y,x) by ratio 231 | if ratio == 1.0: 232 | return img 233 | else: 234 | h, w = img.shape[2:] 235 | s = (int(h * ratio), int(w * ratio)) # new size 236 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 237 | if not same_shape: # pad/crop img 238 | gs = 32 # (pixels) grid size 239 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 240 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 241 | 242 | 243 | def copy_attr(a, b, include=(), exclude=()): 244 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 245 | for k, v in b.__dict__.items(): 246 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 247 | continue 248 | else: 249 | setattr(a, k, v) 250 | 251 | 252 | class ModelEMA: 253 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 254 | Keep a moving average of everything in the model state_dict (parameters and buffers). 255 | This is intended to allow functionality like 256 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 257 | A smoothed version of the weights is necessary for some training schemes to perform well. 258 | This class is sensitive where it is initialized in the sequence of model init, 259 | GPU assignment and distributed training wrappers. 260 | """ 261 | 262 | def __init__(self, model, decay=0.9999, updates=0): 263 | # Create EMA 264 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 265 | # if next(model.parameters()).device.type != 'cpu': 266 | # self.ema.half() # FP16 EMA 267 | self.updates = updates # number of EMA updates 268 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 269 | for p in self.ema.parameters(): 270 | p.requires_grad_(False) 271 | 272 | def update(self, model): 273 | # Update EMA parameters 274 | with torch.no_grad(): 275 | self.updates += 1 276 | d = self.decay(self.updates) 277 | 278 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 279 | for k, v in self.ema.state_dict().items(): 280 | if v.dtype.is_floating_point: 281 | v *= d 282 | v += (1. - d) * msd[k].detach() 283 | 284 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 285 | # Update EMA attributes 286 | copy_attr(self.ema, model, include, exclude) 287 | -------------------------------------------------------------------------------- /yolov5/weights/download_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Download latest models from https://github.com/ultralytics/yolov5/releases 3 | # Usage: 4 | # $ bash weights/download_weights.sh 5 | 6 | python - <