├── .gitignore ├── LICENSE ├── README.md ├── clip_object_tracker.py ├── coco.names ├── cv.yml ├── data ├── coco.yaml ├── coco128.yaml ├── hyp.finetune.yaml ├── hyp.scratch.yaml ├── images │ ├── bus.jpg │ └── zidane.jpg ├── scripts │ ├── get_coco.sh │ └── get_voc.sh ├── video │ ├── cars.mp4 │ ├── fish.mp4 │ └── test.mp4 └── voc.yaml ├── deep_sort ├── __init__.py ├── detection.py ├── iou_matching.py ├── kalman_filter.py ├── linear_assignment.py ├── nn_matching.py ├── preprocessing.py ├── track.py └── tracker.py ├── example └── video │ └── fish.mp4 ├── model_data └── mars-small128.pb ├── models ├── __init__.py ├── common.py ├── experimental.py ├── export.py ├── hub │ ├── yolov3-spp.yaml │ ├── yolov3-tiny.yaml │ ├── yolov3.yaml │ ├── yolov5-fpn.yaml │ └── yolov5-panet.yaml ├── yolo.py ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml └── yolov5x.yaml ├── requirements.txt ├── tool ├── config.py ├── darknet2pytorch.py ├── region_loss.py ├── torch_utils.py ├── utils.py ├── utils_iou.py └── yolo_layer.py ├── tools ├── freeze_model.py ├── generate_clip_detections.py └── generate_detections.py └── utils ├── __init__.py ├── activations.py ├── autoanchor.py ├── datasets.py ├── general.py ├── google_app_engine ├── Dockerfile ├── additional_requirements.txt └── app.yaml ├── google_utils.py ├── loss.py ├── metrics.py ├── models ├── __init__.py ├── common.py ├── experimental.py ├── export.py ├── hub │ ├── yolov3-spp.yaml │ ├── yolov3-tiny.yaml │ ├── yolov3.yaml │ ├── yolov5-fpn.yaml │ └── yolov5-panet.yaml ├── yolo.py ├── yolov5l.yaml ├── yolov5m.yaml ├── yolov5s.yaml └── yolov5x.yaml ├── plots.py ├── roboflow.py ├── torch_utils.py ├── yolov4.py ├── yolov5.py └── yolov7.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Repo-specific GitIgnore ---------------------------------------------------------------------------------------------- 2 | *.cfg 3 | !cfg/yolov3*.cfg 4 | 5 | storage.googleapis.com 6 | runs/* 7 | !data/images/zidane.jpg 8 | !data/images/bus.jpg 9 | !data/coco.names 10 | !data/coco_paper.names 11 | !data/coco.data 12 | !data/coco_*.data 13 | !data/coco_*.txt 14 | !data/trainvalno5k.shapes 15 | !data/*.sh 16 | 17 | pycocotools/* 18 | results*.txt 19 | gcp_test*.sh 20 | 21 | # Datasets ------------------------------------------------------------------------------------------------------------- 22 | coco/ 23 | coco128/ 24 | VOC/ 25 | 26 | # MATLAB GitIgnore ----------------------------------------------------------------------------------------------------- 27 | *.m~ 28 | *.mat 29 | !targets*.mat 30 | 31 | # Neural Network weights ----------------------------------------------------------------------------------------------- 32 | *.weights 33 | *.pt 34 | *.onnx 35 | *.mlmodel 36 | *.torchscript 37 | darknet53.conv.74 38 | yolov3-tiny.conv.15 39 | 40 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 41 | # Byte-compiled / optimized / DLL files 42 | __pycache__/ 43 | *.py[cod] 44 | *$py.class 45 | 46 | # C extensions 47 | *.so 48 | 49 | # Distribution / packaging 50 | .Python 51 | env/ 52 | build/ 53 | develop-eggs/ 54 | dist/ 55 | downloads/ 56 | eggs/ 57 | .eggs/ 58 | lib/ 59 | lib64/ 60 | parts/ 61 | sdist/ 62 | var/ 63 | wheels/ 64 | *.egg-info/ 65 | wandb/ 66 | .installed.cfg 67 | *.egg 68 | 69 | 70 | # PyInstaller 71 | # Usually these files are written by a python script from a template 72 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 73 | *.manifest 74 | *.spec 75 | 76 | # Installer logs 77 | pip-log.txt 78 | pip-delete-this-directory.txt 79 | 80 | # Unit test / coverage reports 81 | htmlcov/ 82 | .tox/ 83 | .coverage 84 | .coverage.* 85 | .cache 86 | nosetests.xml 87 | coverage.xml 88 | *.cover 89 | .hypothesis/ 90 | 91 | # Translations 92 | *.mo 93 | *.pot 94 | 95 | # Django stuff: 96 | *.log 97 | local_settings.py 98 | 99 | # Flask stuff: 100 | instance/ 101 | .webassets-cache 102 | 103 | # Scrapy stuff: 104 | .scrapy 105 | 106 | # Sphinx documentation 107 | docs/_build/ 108 | 109 | # PyBuilder 110 | target/ 111 | 112 | # Jupyter Notebook 113 | .ipynb_checkpoints 114 | 115 | # pyenv 116 | .python-version 117 | 118 | # celery beat schedule file 119 | celerybeat-schedule 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # dotenv 125 | .env 126 | 127 | # virtualenv 128 | .venv* 129 | venv*/ 130 | ENV*/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | 145 | 146 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 147 | 148 | # General 149 | .DS_Store 150 | .AppleDouble 151 | .LSOverride 152 | 153 | # Icon must end with two \r 154 | Icon 155 | Icon? 156 | 157 | # Thumbnails 158 | ._* 159 | 160 | # Files that might appear in the root of a volume 161 | .DocumentRevisions-V100 162 | .fseventsd 163 | .Spotlight-V100 164 | .TemporaryItems 165 | .Trashes 166 | .VolumeIcon.icns 167 | .com.apple.timemachine.donotpresent 168 | 169 | # Directories potentially created on remote AFP share 170 | .AppleDB 171 | .AppleDesktop 172 | Network Trash Folder 173 | Temporary Items 174 | .apdisk 175 | 176 | 177 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 178 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 179 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 180 | 181 | # User-specific stuff: 182 | .idea/* 183 | .idea/**/workspace.xml 184 | .idea/**/tasks.xml 185 | .idea/dictionaries 186 | .html # Bokeh Plots 187 | .pg # TensorFlow Frozen Graphs 188 | .avi # videos 189 | 190 | # Sensitive or high-churn files: 191 | .idea/**/dataSources/ 192 | .idea/**/dataSources.ids 193 | .idea/**/dataSources.local.xml 194 | .idea/**/sqlDataSources.xml 195 | .idea/**/dynamic.xml 196 | .idea/**/uiDesigner.xml 197 | 198 | # Gradle: 199 | .idea/**/gradle.xml 200 | .idea/**/libraries 201 | 202 | # CMake 203 | cmake-build-debug/ 204 | cmake-build-release/ 205 | 206 | # Mongo Explorer plugin: 207 | .idea/**/mongoSettings.xml 208 | 209 | ## File-based project format: 210 | *.iws 211 | 212 | ## Plugin-specific files: 213 | 214 | # IntelliJ 215 | out/ 216 | 217 | # mpeltonen/sbt-idea plugin 218 | .idea_modules/ 219 | 220 | # JIRA plugin 221 | atlassian-ide-plugin.xml 222 | 223 | # Cursive Clojure plugin 224 | .idea/replstate.xml 225 | 226 | # Crashlytics plugin (for Android Studio and IntelliJ) 227 | com_crashlytics_export_strings.xml 228 | crashlytics.properties 229 | crashlytics-build.properties 230 | fabric.properties 231 | 232 | CLIP-repo/ 233 | clip/ 234 | 235 | pytorch-YOLOv4/ 236 | yolov4.weights 237 | yolov4.cfg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Roboflow Object Tracking Example 2 | 3 | Object tracking using Roboflow Inference API and Zero-Shot (CLIP) Deep SORT. Read more in our 4 | [Zero-Shot Object Tracking announcement post](https://blog.roboflow.com/zero-shot-object-tracking/). 5 | 6 | ![Example fish tracking](https://user-images.githubusercontent.com/870796/130703648-8af62801-d66c-41f5-80ae-889301ae9b44.gif) 7 | 8 | Example object tracking courtesy of the [Roboflow Universe public Aquarium model and dataset](https://universe.roboflow.com/brad-dwyer/aquarium-combined). You can adapt this to your own dataset on Roboflow or any pre-trained model from [Roboflow Universe](https://universe.roboflow.com). 9 | 10 | # Overview 11 | 12 | Object tracking involves following individual objects of interest across frames. It 13 | combines the output of an [object detection](https://blog.roboflow.com/object-detection) model 14 | with a secondary algorithm to determine which detections are identifying "the same" 15 | object over time. 16 | 17 | Previously, this required training a special classification model to differentiate 18 | the instances of each different class. In this repository, we have used 19 | [OpenAI's CLIP zero-shot image classifier](https://blog.roboflow.com/clip-model-eli5-beginner-guide/) 20 | to create a universal object tracking repository. All you need is a trained object 21 | detection model and CLIP handles the instance identification for the object tracking 22 | algorithm. 23 | 24 | # Getting Started 25 | 26 | Colab Tutorial Here: 27 | 28 | Open In Colab 29 | 30 | ## Training your model 31 | 32 | To use the Roboflow Inference API as your detection engine: 33 | 34 | Upload, annotate, and train your model on Roboflow with [Roboflow Train](https://docs.roboflow.com/train). 35 | Your model will be hosted on an inference URL. 36 | 37 | To use YOLOv7 as your detection engine: 38 | 39 | Follow Roboflow's [Train YOLOv7 on Custom Data Tutorial](https://blog.roboflow.com/yolov7-custom-dataset-training-tutorial/) 40 | 41 | The YOLOv7 implementation uses [this colab notebook](https://colab.research.google.com/drive/1X9A8odmK4k6l26NDviiT6dd6TgR-piOa) 42 | 43 | To use YOLOv5 as your detection engine: 44 | 45 | Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/) 46 | 47 | The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ) 48 | 49 | The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6` 50 | 51 | ## Performing Object Tracking 52 | 53 | ### Clone repositories 54 | 55 | ``` 56 | git clone https://github.com/roboflow-ai/zero-shot-object-tracking 57 | cd zero-shot-object-tracking 58 | git clone https://github.com/openai/CLIP.git CLIP-repo 59 | cp -r ./CLIP-repo/clip ./clip // Unix based 60 | robocopy CLIP-repo/clip clip\ // Windows 61 | ``` 62 | 63 | ### Install requirements (python 3.7+) 64 | 65 | ```bash 66 | pip install --upgrade pip 67 | pip install -r requirements.txt 68 | ``` 69 | 70 | ### Install requirements (anaconda python 3.8) 71 | ``` 72 | conda install pytorch torchvision torchaudio -c pytorch 73 | conda install ftfy regex tqdm requests pandas seaborn 74 | pip install opencv pycocotools tensorflow 75 | ``` 76 | 77 | ### Run with Roboflow 78 | 79 | ```bash 80 | 81 | python clip_object_tracker.py --source data/video/fish.mp4 --url https://detect.roboflow.com/playing-cards-ow27d/1 --api_key ROBOFLOW_API_KEY --info 82 | ``` 83 | 84 | **NOTE you must provide a valid API key from [Roboflow](docs.roboflow.com) 85 | 86 | ### Run with YOLOv7 87 | ```bash 88 | 89 | python clip_object_tracker.py --weights models/yolov7.pt --source data/video/fish.mp4 --detection-engine yolov7 --info 90 | ``` 91 | 92 | ### Run with YOLOv5 93 | ```bash 94 | 95 | python clip_object_tracker.py --weights models/yolov5s.pt --source data/video/fish.mp4 --detection-engine yolov5 --info 96 | ``` 97 | 98 | ### Run with YOLOv4 99 | To use YOLOv4 for object detection you will need pretrained weights (.weights file), a model config for your weights (.cfg), and a class names file (.names). Test weights can be found here https://github.com/AlexeyAB/darknet. [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg) 100 | ``` 101 | python clip_object_tracker.py --weights yolov4.weights --cfg yolov4.cfg --names coco.names --source data/video/cars.mp4 --detection-engine yolov4 --info 102 | ``` 103 | (by default, output will be in runs/detect/exp[num]) 104 | 105 |
106 | 109 |
110 | 111 | Help 112 | 113 | ```bash 114 | python clip_object_tracker.py -h 115 | ``` 116 | ``` 117 | --weights WEIGHTS [WEIGHTS ...] model.pt path(s) 118 | --source SOURCE source (video/image) 119 | --img-size IMG_SIZE inference size (pixels) 120 | --confidence CONFIDENCE object confidence threshold 121 | --overlap OVERLAP IOU threshold for NMS 122 | --thickness THICKNESS Thickness of the bounding box strokes 123 | --device DEVICE cuda device, i.e. 0 or 0,1,2,3 or cpu 124 | --view-img display results 125 | --save-txt save results to *.txt 126 | --save-conf save confidences in --save-txt labels 127 | --classes CLASSES [CLASSES ...] filter by class: --class 0, or --class 0 2 3 128 | --agnostic-nms class-agnostic NMS 129 | --augment augmented inference 130 | --update update all models 131 | --project PROJECT save results to project/name 132 | --name NAME save results to project/name 133 | --exist-ok existing project/name ok, do not increment 134 | --nms_max_overlap Non-maxima suppression threshold: Maximum detection overlap. 135 | --max_cosine_distance Gating threshold for cosine distance metric (object appearance). 136 | --nn_budget NN_BUDGET Maximum size of the appearance descriptors allery. If None, no budget is enforced. 137 | --api_key API_KEY Roboflow API Key. 138 | --url URL Roboflow Model URL. 139 | --info Print debugging info. 140 | --detection-engine Which engine you want to use for object detection (yolov7, yolov5, yolov4, roboflow). 141 | ``` 142 | ## Acknowledgements 143 | 144 | Huge thanks to: 145 | 146 | - [yolov4-deepsort by theAIGuysCode](https://github.com/theAIGuysCode/yolov4-deepsort) 147 | - [yolov5 by ultralytics](https://github.com/ultralytics/yolov5) 148 | - [yolov7 by WongKinYiu](https://github.com/WongKinYiu/yolov7) 149 | - [Deep SORT Repository by nwojke](https://github.com/nwojke/deep_sort) 150 | - [OpenAI for being awesome](https://openai.com/blog/clip/) 151 | -------------------------------------------------------------------------------- /coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /cv.yml: -------------------------------------------------------------------------------- 1 | name: cv 2 | 3 | channels: 4 | - conda-forge 5 | 6 | dependencies: 7 | - python==3.8.6 8 | - pip 9 | - cython 10 | - matplotlib>=3.2.2 11 | - numpy>=1.18.5 12 | - PyYAML>=5.3 13 | - scipy>=1.4.1 14 | - tensorboard>=2.2 15 | - torchvision>=0.8.1 16 | - tqdm>=4.41.0 17 | - requests==2.26.0 18 | - pandas==1.3.2 19 | - seaborn>=0.11.0 20 | - ftfy==6.0.3 21 | - pillow 22 | - opencv 23 | - regex 24 | - pip: 25 | - lxml 26 | - torch>=1.7.0 -------------------------------------------------------------------------------- /data/coco.yaml: -------------------------------------------------------------------------------- 1 | # COCO 2017 dataset http://cocodataset.org 2 | # Train command: python train.py --data coco.yaml 3 | # Default dataset location is next to /yolov5: 4 | # /parent_folder 5 | # /coco 6 | # /yolov5 7 | 8 | 9 | # download command/URL (optional) 10 | download: bash data/scripts/get_coco.sh 11 | 12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 13 | train: ../coco/train2017.txt # 118287 images 14 | val: ../coco/val2017.txt # 5000 images 15 | test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794 16 | 17 | # number of classes 18 | nc: 80 19 | 20 | # class names 21 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 22 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 23 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 24 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 25 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 26 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 27 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 28 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 29 | 'hair drier', 'toothbrush'] 30 | 31 | # Print classes 32 | # with open('data/coco.yaml') as f: 33 | # d = yaml.load(f, Loader=yaml.FullLoader) # dict 34 | # for i, x in enumerate(d['names']): 35 | # print(i, x) 36 | -------------------------------------------------------------------------------- /data/coco128.yaml: -------------------------------------------------------------------------------- 1 | # COCO 2017 dataset http://cocodataset.org - first 128 training images 2 | # Train command: python train.py --data coco128.yaml 3 | # Default dataset location is next to /yolov5: 4 | # /parent_folder 5 | # /coco128 6 | # /yolov5 7 | 8 | 9 | # download command/URL (optional) 10 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip 11 | 12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/] 13 | train: ../coco128/images/train2017/ # 128 images 14 | val: ../coco128/images/train2017/ # 128 images 15 | 16 | # number of classes 17 | nc: 80 18 | 19 | # class names 20 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 21 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 22 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 23 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 24 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 25 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 26 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 27 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 28 | 'hair drier', 'toothbrush'] 29 | -------------------------------------------------------------------------------- /data/hyp.finetune.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for VOC finetuning 2 | # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | # Hyperparameter Evolution Results 7 | # Generations: 306 8 | # P R mAP.5 mAP.5:.95 box obj cls 9 | # Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146 10 | 11 | lr0: 0.0032 12 | lrf: 0.12 13 | momentum: 0.843 14 | weight_decay: 0.00036 15 | warmup_epochs: 2.0 16 | warmup_momentum: 0.5 17 | warmup_bias_lr: 0.05 18 | box: 0.0296 19 | cls: 0.243 20 | cls_pw: 0.631 21 | obj: 0.301 22 | obj_pw: 0.911 23 | iou_t: 0.2 24 | anchor_t: 2.91 25 | # anchors: 3.63 26 | fl_gamma: 0.0 27 | hsv_h: 0.0138 28 | hsv_s: 0.664 29 | hsv_v: 0.464 30 | degrees: 0.373 31 | translate: 0.245 32 | scale: 0.898 33 | shear: 0.602 34 | perspective: 0.0 35 | flipud: 0.00856 36 | fliplr: 0.5 37 | mosaic: 1.0 38 | mixup: 0.243 39 | -------------------------------------------------------------------------------- /data/hyp.scratch.yaml: -------------------------------------------------------------------------------- 1 | # Hyperparameters for COCO training from scratch 2 | # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300 3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials 4 | 5 | 6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3) 7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf) 8 | momentum: 0.937 # SGD momentum/Adam beta1 9 | weight_decay: 0.0005 # optimizer weight decay 5e-4 10 | warmup_epochs: 3.0 # warmup epochs (fractions ok) 11 | warmup_momentum: 0.8 # warmup initial momentum 12 | warmup_bias_lr: 0.1 # warmup initial bias lr 13 | box: 0.05 # box loss gain 14 | cls: 0.5 # cls loss gain 15 | cls_pw: 1.0 # cls BCELoss positive_weight 16 | obj: 1.0 # obj loss gain (scale with pixels) 17 | obj_pw: 1.0 # obj BCELoss positive_weight 18 | iou_t: 0.20 # IoU training threshold 19 | anchor_t: 4.0 # anchor-multiple threshold 20 | # anchors: 3 # anchors per output layer (0 to ignore) 21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5) 22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction) 23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction) 24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction) 25 | degrees: 0.0 # image rotation (+/- deg) 26 | translate: 0.1 # image translation (+/- fraction) 27 | scale: 0.5 # image scale (+/- gain) 28 | shear: 0.0 # image shear (+/- deg) 29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001 30 | flipud: 0.0 # image flip up-down (probability) 31 | fliplr: 0.5 # image flip left-right (probability) 32 | mosaic: 1.0 # image mosaic (probability) 33 | mixup: 0.0 # image mixup (probability) 34 | -------------------------------------------------------------------------------- /data/images/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/images/bus.jpg -------------------------------------------------------------------------------- /data/images/zidane.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/images/zidane.jpg -------------------------------------------------------------------------------- /data/scripts/get_coco.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # COCO 2017 dataset http://cocodataset.org 3 | # Download command: bash data/scripts/get_coco.sh 4 | # Train command: python train.py --data coco.yaml 5 | # Default dataset location is next to /yolov5: 6 | # /parent_folder 7 | # /coco 8 | # /yolov5 9 | 10 | # Download/unzip labels 11 | d='../' # unzip directory 12 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ 13 | f='coco2017labels.zip' # 68 MB 14 | echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove 15 | 16 | # Download/unzip images 17 | d='../coco/images' # unzip directory 18 | url=http://images.cocodataset.org/zips/ 19 | f1='train2017.zip' # 19G, 118k images 20 | f2='val2017.zip' # 1G, 5k images 21 | f3='test2017.zip' # 7G, 41k images (optional) 22 | for f in $f1 $f2; do 23 | echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove 24 | done 25 | -------------------------------------------------------------------------------- /data/scripts/get_voc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/ 3 | # Download command: bash data/scripts/get_voc.sh 4 | # Train command: python train.py --data voc.yaml 5 | # Default dataset location is next to /yolov5: 6 | # /parent_folder 7 | # /VOC 8 | # /yolov5 9 | 10 | start=$(date +%s) 11 | mkdir -p ../tmp 12 | cd ../tmp/ 13 | 14 | # Download/unzip images and labels 15 | d='.' # unzip directory 16 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/ 17 | f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images 18 | f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images 19 | f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images 20 | for f in $f1 $f2 $f3; do 21 | echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove 22 | done 23 | 24 | end=$(date +%s) 25 | runtime=$((end - start)) 26 | echo "Completed in" $runtime "seconds" 27 | 28 | echo "Splitting dataset..." 29 | python3 - "$@" <train.txt 89 | cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt 90 | 91 | python3 - "$@" < 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /deep_sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(self._motion_mat, mean) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def update(self, mean, covariance, measurement): 155 | """Run Kalman filter correction step. 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The predicted state's mean vector (8 dimensional). 161 | covariance : ndarray 162 | The state's covariance matrix (8x8 dimensional). 163 | measurement : ndarray 164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 165 | is the center position, a the aspect ratio, and h the height of the 166 | bounding box. 167 | 168 | Returns 169 | ------- 170 | (ndarray, ndarray) 171 | Returns the measurement-corrected state distribution. 172 | 173 | """ 174 | projected_mean, projected_cov = self.project(mean, covariance) 175 | 176 | chol_factor, lower = scipy.linalg.cho_factor( 177 | projected_cov, lower=True, check_finite=False) 178 | kalman_gain = scipy.linalg.cho_solve( 179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 180 | check_finite=False).T 181 | innovation = measurement - projected_mean 182 | 183 | new_mean = mean + np.dot(innovation, kalman_gain.T) 184 | new_covariance = covariance - np.linalg.multi_dot(( 185 | kalman_gain, projected_cov, kalman_gain.T)) 186 | return new_mean, new_covariance 187 | 188 | def gating_distance(self, mean, covariance, measurements, 189 | only_position=False): 190 | """Compute gating distance between state distribution and measurements. 191 | 192 | A suitable distance threshold can be obtained from `chi2inv95`. If 193 | `only_position` is False, the chi-square distribution has 4 degrees of 194 | freedom, otherwise 2. 195 | 196 | Parameters 197 | ---------- 198 | mean : ndarray 199 | Mean vector over the state distribution (8 dimensional). 200 | covariance : ndarray 201 | Covariance of the state distribution (8x8 dimensional). 202 | measurements : ndarray 203 | An Nx4 dimensional matrix of N measurements, each in 204 | format (x, y, a, h) where (x, y) is the bounding box center 205 | position, a the aspect ratio, and h the height. 206 | only_position : Optional[bool] 207 | If True, distance computation is done with respect to the bounding 208 | box center position only. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | Returns an array of length N, where the i-th element contains the 214 | squared Mahalanobis distance between (mean, covariance) and 215 | `measurements[i]`. 216 | 217 | """ 218 | mean, covariance = self.project(mean, covariance) 219 | if only_position: 220 | mean, covariance = mean[:2], covariance[:2, :2] 221 | measurements = measurements[:, :2] 222 | 223 | cholesky_factor = np.linalg.cholesky(covariance) 224 | d = measurements - mean 225 | z = scipy.linalg.solve_triangular( 226 | cholesky_factor, d.T, lower=True, check_finite=False, 227 | overwrite_b=True) 228 | squared_maha = np.sum(z * z, axis=0) 229 | return squared_maha 230 | -------------------------------------------------------------------------------- /deep_sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from scipy.optimize import linear_sum_assignment 5 | from . import kalman_filter 6 | 7 | 8 | INFTY_COST = 1e+5 9 | 10 | 11 | def min_cost_matching( 12 | distance_metric, max_distance, tracks, detections, track_indices=None, 13 | detection_indices=None): 14 | """Solve linear assignment problem. 15 | 16 | Parameters 17 | ---------- 18 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 19 | The distance metric is given a list of tracks and detections as well as 20 | a list of N track indices and M detection indices. The metric should 21 | return the NxM dimensional cost matrix, where element (i, j) is the 22 | association cost between the i-th track in the given track indices and 23 | the j-th detection in the given detection_indices. 24 | max_distance : float 25 | Gating threshold. Associations with cost larger than this value are 26 | disregarded. 27 | tracks : List[track.Track] 28 | A list of predicted tracks at the current time step. 29 | detections : List[detection.Detection] 30 | A list of detections at the current time step. 31 | track_indices : List[int] 32 | List of track indices that maps rows in `cost_matrix` to tracks in 33 | `tracks` (see description above). 34 | detection_indices : List[int] 35 | List of detection indices that maps columns in `cost_matrix` to 36 | detections in `detections` (see description above). 37 | 38 | Returns 39 | ------- 40 | (List[(int, int)], List[int], List[int]) 41 | Returns a tuple with the following three entries: 42 | * A list of matched track and detection indices. 43 | * A list of unmatched track indices. 44 | * A list of unmatched detection indices. 45 | 46 | """ 47 | if track_indices is None: 48 | track_indices = np.arange(len(tracks)) 49 | if detection_indices is None: 50 | detection_indices = np.arange(len(detections)) 51 | 52 | if len(detection_indices) == 0 or len(track_indices) == 0: 53 | return [], track_indices, detection_indices # Nothing to match. 54 | 55 | cost_matrix = distance_metric( 56 | tracks, detections, track_indices, detection_indices) 57 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 58 | 59 | # Start of Edited Block by Maxwell Stone 60 | cost_matrix = np.nan_to_num(cost_matrix, copy=True, nan=0.0, posinf=None, neginf=None) 61 | # This code is not from the original DeepSORT algorithm and should be considered if there are tracking issues. 62 | # This line replaces Nan values, caused by incorrect CLIP detections, with 0's to stop from crashing. 63 | # **Warning. Issues may arise from this config. It is not completely tested. 64 | # End of Edited Block by Maxwell Stone 65 | 66 | indices = linear_sum_assignment(cost_matrix) 67 | indices = np.asarray(indices) 68 | indices = np.transpose(indices) 69 | matches, unmatched_tracks, unmatched_detections = [], [], [] 70 | for col, detection_idx in enumerate(detection_indices): 71 | if col not in indices[:, 1]: 72 | unmatched_detections.append(detection_idx) 73 | for row, track_idx in enumerate(track_indices): 74 | if row not in indices[:, 0]: 75 | unmatched_tracks.append(track_idx) 76 | for row, col in indices: 77 | track_idx = track_indices[row] 78 | detection_idx = detection_indices[col] 79 | if cost_matrix[row, col] > max_distance: 80 | unmatched_tracks.append(track_idx) 81 | unmatched_detections.append(detection_idx) 82 | else: 83 | matches.append((track_idx, detection_idx)) 84 | return matches, unmatched_tracks, unmatched_detections 85 | 86 | 87 | def matching_cascade( 88 | distance_metric, max_distance, cascade_depth, tracks, detections, 89 | track_indices=None, detection_indices=None): 90 | """Run matching cascade. 91 | 92 | Parameters 93 | ---------- 94 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 95 | The distance metric is given a list of tracks and detections as well as 96 | a list of N track indices and M detection indices. The metric should 97 | return the NxM dimensional cost matrix, where element (i, j) is the 98 | association cost between the i-th track in the given track indices and 99 | the j-th detection in the given detection indices. 100 | max_distance : float 101 | Gating threshold. Associations with cost larger than this value are 102 | disregarded. 103 | cascade_depth: int 104 | The cascade depth, should be se to the maximum track age. 105 | tracks : List[track.Track] 106 | A list of predicted tracks at the current time step. 107 | detections : List[detection.Detection] 108 | A list of detections at the current time step. 109 | track_indices : Optional[List[int]] 110 | List of track indices that maps rows in `cost_matrix` to tracks in 111 | `tracks` (see description above). Defaults to all tracks. 112 | detection_indices : Optional[List[int]] 113 | List of detection indices that maps columns in `cost_matrix` to 114 | detections in `detections` (see description above). Defaults to all 115 | detections. 116 | 117 | Returns 118 | ------- 119 | (List[(int, int)], List[int], List[int]) 120 | Returns a tuple with the following three entries: 121 | * A list of matched track and detection indices. 122 | * A list of unmatched track indices. 123 | * A list of unmatched detection indices. 124 | 125 | """ 126 | if track_indices is None: 127 | track_indices = list(range(len(tracks))) 128 | if detection_indices is None: 129 | detection_indices = list(range(len(detections))) 130 | 131 | unmatched_detections = detection_indices 132 | matches = [] 133 | for level in range(cascade_depth): 134 | if len(unmatched_detections) == 0: # No detections left 135 | break 136 | 137 | track_indices_l = [ 138 | k for k in track_indices 139 | if tracks[k].time_since_update == 1 + level 140 | ] 141 | if len(track_indices_l) == 0: # Nothing to match at this level 142 | continue 143 | 144 | matches_l, _, unmatched_detections = \ 145 | min_cost_matching( 146 | distance_metric, max_distance, tracks, detections, 147 | track_indices_l, unmatched_detections) 148 | matches += matches_l 149 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 150 | return matches, unmatched_tracks, unmatched_detections 151 | 152 | 153 | def gate_cost_matrix( 154 | kf, cost_matrix, tracks, detections, track_indices, detection_indices, 155 | gated_cost=INFTY_COST, only_position=False): 156 | """Invalidate infeasible entries in cost matrix based on the state 157 | distributions obtained by Kalman filtering. 158 | 159 | Parameters 160 | ---------- 161 | kf : The Kalman filter. 162 | cost_matrix : ndarray 163 | The NxM dimensional cost matrix, where N is the number of track indices 164 | and M is the number of detection indices, such that entry (i, j) is the 165 | association cost between `tracks[track_indices[i]]` and 166 | `detections[detection_indices[j]]`. 167 | tracks : List[track.Track] 168 | A list of predicted tracks at the current time step. 169 | detections : List[detection.Detection] 170 | A list of detections at the current time step. 171 | track_indices : List[int] 172 | List of track indices that maps rows in `cost_matrix` to tracks in 173 | `tracks` (see description above). 174 | detection_indices : List[int] 175 | List of detection indices that maps columns in `cost_matrix` to 176 | detections in `detections` (see description above). 177 | gated_cost : Optional[float] 178 | Entries in the cost matrix corresponding to infeasible associations are 179 | set this value. Defaults to a very large value. 180 | only_position : Optional[bool] 181 | If True, only the x, y position of the state distribution is considered 182 | during gating. Defaults to False. 183 | 184 | Returns 185 | ------- 186 | ndarray 187 | Returns the modified cost matrix. 188 | 189 | """ 190 | gating_dim = 2 if only_position else 4 191 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 192 | measurements = np.asarray( 193 | [detections[i].to_xyah() for i in detection_indices]) 194 | for row, track_idx in enumerate(track_indices): 195 | track = tracks[track_idx] 196 | gating_distance = kf.gating_distance( 197 | track.mean, track.covariance, measurements, only_position) 198 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 199 | return cost_matrix 200 | -------------------------------------------------------------------------------- /deep_sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | distances = _cosine_distance(x, y) 96 | return distances.min(axis=0) 97 | 98 | 99 | class NearestNeighborDistanceMetric(object): 100 | """ 101 | A nearest neighbor distance metric that, for each target, returns 102 | the closest distance to any sample that has been observed so far. 103 | 104 | Parameters 105 | ---------- 106 | metric : str 107 | Either "euclidean" or "cosine". 108 | matching_threshold: float 109 | The matching threshold. Samples with larger distance are considered an 110 | invalid match. 111 | budget : Optional[int] 112 | If not None, fix samples per class to at most this number. Removes 113 | the oldest samples when the budget is reached. 114 | 115 | Attributes 116 | ---------- 117 | samples : Dict[int -> List[ndarray]] 118 | A dictionary that maps from target identities to the list of samples 119 | that have been observed so far. 120 | 121 | """ 122 | 123 | def __init__(self, metric, matching_threshold, budget=None): 124 | 125 | 126 | if metric == "euclidean": 127 | self._metric = _nn_euclidean_distance 128 | elif metric == "cosine": 129 | self._metric = _nn_cosine_distance 130 | else: 131 | raise ValueError( 132 | "Invalid metric; must be either 'euclidean' or 'cosine'") 133 | self.matching_threshold = matching_threshold 134 | self.budget = budget 135 | self.samples = {} 136 | 137 | def partial_fit(self, features, targets, active_targets): 138 | """Update the distance metric with new data. 139 | 140 | Parameters 141 | ---------- 142 | features : ndarray 143 | An NxM matrix of N features of dimensionality M. 144 | targets : ndarray 145 | An integer array of associated target identities. 146 | active_targets : List[int] 147 | A list of targets that are currently present in the scene. 148 | 149 | """ 150 | for feature, target in zip(features, targets): 151 | self.samples.setdefault(target, []).append(feature) 152 | if self.budget is not None: 153 | self.samples[target] = self.samples[target][-self.budget:] 154 | self.samples = {k: self.samples[k] for k in active_targets} 155 | 156 | def distance(self, features, targets): 157 | """Compute distance between features and targets. 158 | 159 | Parameters 160 | ---------- 161 | features : ndarray 162 | An NxM matrix of N features of dimensionality M. 163 | targets : List[int] 164 | A list of targets to match the given `features` against. 165 | 166 | Returns 167 | ------- 168 | ndarray 169 | Returns a cost matrix of shape len(targets), len(features), where 170 | element (i, j) contains the closest squared distance between 171 | `targets[i]` and `features[j]`. 172 | 173 | """ 174 | cost_matrix = np.zeros((len(targets), len(features))) 175 | for i, target in enumerate(targets): 176 | cost_matrix[i, :] = self._metric(self.samples[target], features) 177 | return cost_matrix 178 | -------------------------------------------------------------------------------- /deep_sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> classes = [d.classes for d in detections] 19 | >>> scores = [d.confidence for d in detections] 20 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 21 | >>> detections = [detections[i] for i in indices] 22 | 23 | Parameters 24 | ---------- 25 | boxes : ndarray 26 | Array of ROIs (x, y, width, height). 27 | max_bbox_overlap : float 28 | ROIs that overlap more than this values are suppressed. 29 | scores : Optional[array_like] 30 | Detector confidence score. 31 | 32 | Returns 33 | ------- 34 | List[int] 35 | Returns indices of detections that have survived non-maxima suppression. 36 | 37 | """ 38 | if len(boxes) == 0: 39 | return [] 40 | 41 | boxes = boxes.astype(np.float) 42 | pick = [] 43 | 44 | x1 = boxes[:, 0] 45 | y1 = boxes[:, 1] 46 | x2 = boxes[:, 2] + boxes[:, 0] 47 | y2 = boxes[:, 3] + boxes[:, 1] 48 | 49 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 50 | if scores is not None: 51 | idxs = np.argsort(scores) 52 | else: 53 | idxs = np.argsort(y2) 54 | 55 | while len(idxs) > 0: 56 | last = len(idxs) - 1 57 | i = idxs[last] 58 | pick.append(i) 59 | 60 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 61 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 62 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 63 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 64 | 65 | w = np.maximum(0, xx2 - xx1 + 1) 66 | h = np.maximum(0, yy2 - yy1 + 1) 67 | 68 | overlap = (w * h) / area[idxs[:last]] 69 | 70 | idxs = np.delete( 71 | idxs, np.concatenate( 72 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 73 | 74 | return pick 75 | -------------------------------------------------------------------------------- /deep_sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | """ 21 | A single target track with state space `(x, y, a, h)` and associated 22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 23 | aspect ratio and `h` is the height. 24 | 25 | Parameters 26 | ---------- 27 | mean : ndarray 28 | Mean vector of the initial state distribution. 29 | covariance : ndarray 30 | Covariance matrix of the initial state distribution. 31 | track_id : int 32 | A unique track identifier. 33 | n_init : int 34 | Number of consecutive detections before the track is confirmed. The 35 | track state is set to `Deleted` if a miss occurs within the first 36 | `n_init` frames. 37 | max_age : int 38 | The maximum number of consecutive misses before the track state is 39 | set to `Deleted`. 40 | feature : Optional[ndarray] 41 | Feature vector of the detection this track originates from. If not None, 42 | this feature is added to the `features` cache. 43 | 44 | Attributes 45 | ---------- 46 | mean : ndarray 47 | Mean vector of the initial state distribution. 48 | covariance : ndarray 49 | Covariance matrix of the initial state distribution. 50 | track_id : int 51 | A unique track identifier. 52 | hits : int 53 | Total number of measurement updates. 54 | age : int 55 | Total number of frames since first occurance. 56 | time_since_update : int 57 | Total number of frames since last measurement update. 58 | state : TrackState 59 | The current track state. 60 | features : List[ndarray] 61 | A cache of features. On each measurement update, the associated feature 62 | vector is added to this list. 63 | 64 | """ 65 | 66 | def __init__(self, mean, covariance, track_id, n_init, max_age, 67 | feature=None, class_num=None): 68 | self.mean = mean 69 | self.covariance = covariance 70 | self.track_id = track_id 71 | self.hits = 1 72 | self.age = 1 73 | self.time_since_update = 0 74 | 75 | self.state = TrackState.Tentative 76 | self.features = [] 77 | if feature is not None: 78 | self.features.append(feature) 79 | 80 | self._n_init = n_init 81 | self._max_age = max_age 82 | self.class_num = class_num 83 | 84 | def to_tlwh(self): 85 | """Get current position in bounding box format `(top left x, top left y, 86 | width, height)`. 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | The bounding box. 92 | 93 | """ 94 | ret = self.mean[:4].copy() 95 | ret[2] *= ret[3] 96 | ret[:2] -= ret[2:] / 2 97 | return ret 98 | 99 | def to_tlbr(self): 100 | """Get current position in bounding box format `(min x, miny, max x, 101 | max y)`. 102 | 103 | Returns 104 | ------- 105 | ndarray 106 | The bounding box. 107 | 108 | """ 109 | ret = self.to_tlwh() 110 | ret[2:] = ret[:2] + ret[2:] 111 | return ret 112 | 113 | def predict(self, kf): 114 | """Propagate the state distribution to the current time step using a 115 | Kalman filter prediction step. 116 | 117 | Parameters 118 | ---------- 119 | kf : kalman_filter.KalmanFilter 120 | The Kalman filter. 121 | 122 | """ 123 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 124 | self.age += 1 125 | self.time_since_update += 1 126 | 127 | def update(self, kf, detection): 128 | """Perform Kalman filter measurement update step and update the feature 129 | cache. 130 | 131 | Parameters 132 | ---------- 133 | kf : kalman_filter.KalmanFilter 134 | The Kalman filter. 135 | detection : Detection 136 | The associated detection. 137 | 138 | """ 139 | self.mean, self.covariance = kf.update( 140 | self.mean, self.covariance, detection.to_xyah()) 141 | self.features.append(detection.feature) 142 | 143 | self.hits += 1 144 | self.time_since_update = 0 145 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 146 | self.state = TrackState.Confirmed 147 | 148 | def mark_missed(self): 149 | """Mark this track as missed (no association at the current time step). 150 | """ 151 | if self.state == TrackState.Tentative: 152 | self.state = TrackState.Deleted 153 | elif self.time_since_update > self._max_age: 154 | self.state = TrackState.Deleted 155 | 156 | def is_tentative(self): 157 | """Returns True if this track is tentative (unconfirmed). 158 | """ 159 | return self.state == TrackState.Tentative 160 | 161 | def is_confirmed(self): 162 | """Returns True if this track is confirmed.""" 163 | return self.state == TrackState.Confirmed 164 | 165 | def is_deleted(self): 166 | """Returns True if this track is dead and should be deleted.""" 167 | return self.state == TrackState.Deleted 168 | -------------------------------------------------------------------------------- /deep_sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=60, n_init=3): 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | def update(self, detections): 59 | """Perform measurement update and track management. 60 | 61 | Parameters 62 | ---------- 63 | detections : List[deep_sort.detection.Detection] 64 | A list of detections at the current time step. 65 | 66 | """ 67 | # Run matching cascade. 68 | matches, unmatched_tracks, unmatched_detections = \ 69 | self._match(detections) 70 | 71 | # Update track set. 72 | for track_idx, detection_idx in matches: 73 | self.tracks[track_idx].update( 74 | self.kf, detections[detection_idx]) 75 | for track_idx in unmatched_tracks: 76 | self.tracks[track_idx].mark_missed() 77 | for detection_idx in unmatched_detections: 78 | self._initiate_track(detections[detection_idx]) 79 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 80 | 81 | # Update distance metric. 82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 83 | features, targets = [], [] 84 | for track in self.tracks: 85 | if not track.is_confirmed(): 86 | continue 87 | features += track.features 88 | targets += [track.track_id for _ in track.features] 89 | track.features = [] 90 | self.metric.partial_fit( 91 | np.asarray(features), np.asarray(targets), active_targets) 92 | 93 | def _match(self, detections): 94 | 95 | def gated_metric(tracks, dets, track_indices, detection_indices): 96 | features = np.array([dets[i].feature for i in detection_indices]) 97 | targets = np.array([tracks[i].track_id for i in track_indices]) 98 | cost_matrix = self.metric.distance(features, targets) 99 | cost_matrix = linear_assignment.gate_cost_matrix( 100 | self.kf, cost_matrix, tracks, dets, track_indices, 101 | detection_indices) 102 | 103 | return cost_matrix 104 | 105 | # Split track set into confirmed and unconfirmed tracks. 106 | confirmed_tracks = [ 107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 108 | unconfirmed_tracks = [ 109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 110 | 111 | # Associate confirmed tracks using appearance features. 112 | matches_a, unmatched_tracks_a, unmatched_detections = \ 113 | linear_assignment.matching_cascade( 114 | gated_metric, self.metric.matching_threshold, self.max_age, 115 | self.tracks, detections, confirmed_tracks) 116 | 117 | # Associate remaining tracks together with unconfirmed tracks using IOU. 118 | iou_track_candidates = unconfirmed_tracks + [ 119 | k for k in unmatched_tracks_a if 120 | self.tracks[k].time_since_update == 1] 121 | unmatched_tracks_a = [ 122 | k for k in unmatched_tracks_a if 123 | self.tracks[k].time_since_update != 1] 124 | matches_b, unmatched_tracks_b, unmatched_detections = \ 125 | linear_assignment.min_cost_matching( 126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 127 | detections, iou_track_candidates, unmatched_detections) 128 | 129 | matches = matches_a + matches_b 130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 131 | return matches, unmatched_tracks, unmatched_detections 132 | 133 | def _initiate_track(self, detection): 134 | mean, covariance = self.kf.initiate(detection.to_xyah()) 135 | class_num = detection.class_num 136 | self.tracks.append(Track( 137 | mean, covariance, self._next_id, self.n_init, self.max_age, 138 | detection.feature, class_num)) 139 | self._next_id += 1 140 | -------------------------------------------------------------------------------- /example/video/fish.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/example/video/fish.mp4 -------------------------------------------------------------------------------- /model_data/mars-small128.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/model_data/mars-small128.pb -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/models/__init__.py -------------------------------------------------------------------------------- /models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k, s): 62 | super(GhostBottleneck, self).__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super(MixConv2d, self).__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super(Ensemble, self).__init__() 102 | 103 | def forward(self, x, augment=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.cat(y, 1) # nms ensemble 109 | y = torch.stack(y).mean(0) # mean ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None): 114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 115 | model = Ensemble() 116 | for w in weights if isinstance(weights, list) else [weights]: 117 | attempt_download(w) 118 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 119 | 120 | # Compatibility updates 121 | for m in model.modules(): 122 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 123 | m.inplace = True # pytorch 1.7.0 compatibility 124 | elif type(m) is Conv: 125 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 126 | 127 | if len(model) == 1: 128 | return model[-1] # return model 129 | else: 130 | print('Ensemble created with %s\n' % weights) 131 | for k in ['names', 'stride']: 132 | setattr(model, k, getattr(model[-1], k)) 133 | return model # return ensemble 134 | -------------------------------------------------------------------------------- /models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | gs = int(max(model.stride)) # grid size (max stride) 38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv): # assign export-friendly activations 47 | if isinstance(m.act, nn.Hardswish): 48 | m.act = Hardswish() 49 | elif isinstance(m.act, nn.SiLU): 50 | m.act = SiLU() 51 | # elif isinstance(m, models.yolo.Detect): 52 | # m.forward = m.forward_export # assign forward (optional) 53 | model.model[-1].export = True # set Detect() layer export=True 54 | y = model(img) # dry run 55 | 56 | # TorchScript export 57 | try: 58 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 59 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 60 | ts = torch.jit.trace(model, img) 61 | ts.save(f) 62 | print('TorchScript export success, saved as %s' % f) 63 | except Exception as e: 64 | print('TorchScript export failure: %s' % e) 65 | 66 | # ONNX export 67 | try: 68 | import onnx 69 | 70 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 71 | f = opt.weights.replace('.pt', '.onnx') # filename 72 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 73 | output_names=['classes', 'boxes'] if y is None else ['output']) 74 | 75 | # Checks 76 | onnx_model = onnx.load(f) # load onnx model 77 | onnx.checker.check_model(onnx_model) # check onnx model 78 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 79 | print('ONNX export success, saved as %s' % f) 80 | except Exception as e: 81 | print('ONNX export failure: %s' % e) 82 | 83 | # CoreML export 84 | try: 85 | import coremltools as ct 86 | 87 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 88 | # convert model from torchscript and apply pixel scaling as per detect.py 89 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 90 | f = opt.weights.replace('.pt', '.mlmodel') # filename 91 | model.save(f) 92 | print('CoreML export success, saved as %s' % f) 93 | except Exception as e: 94 | print('CoreML export failure: %s' % e) 95 | 96 | # Finish 97 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 98 | -------------------------------------------------------------------------------- /models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, [1, 1]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 6, BottleneckCSP, [1024]], # 9 25 | ] 26 | 27 | # YOLOv5 FPN head 28 | head: 29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements.txt 2 | 3 | # base ---------------------------------------- 4 | Cython 5 | matplotlib>=3.2.2 6 | numpy>=1.18.5 7 | opencv-python>=4.1.2 8 | Pillow 9 | PyYAML>=5.3 10 | scipy>=1.4.1 11 | tensorboard>=2.2 12 | torch>=1.7.0 13 | torchvision>=0.8.1 14 | tqdm>=4.41.0 15 | requests==2.26.0 16 | pyyaml==5.4.1 17 | pandas==1.3.2 18 | 19 | # plotting ------------------------------------ 20 | seaborn>=0.11.0 21 | pandas 22 | 23 | # clip 24 | ftfy==6.0.3 25 | regex==2.5.86 -------------------------------------------------------------------------------- /tool/region_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from tool.torch_utils import * 4 | 5 | 6 | def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale, 7 | sil_thresh, seen): 8 | nB = target.size(0) 9 | nA = num_anchors 10 | nC = num_classes 11 | anchor_step = len(anchors) / num_anchors 12 | conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale 13 | coord_mask = torch.zeros(nB, nA, nH, nW) 14 | cls_mask = torch.zeros(nB, nA, nH, nW) 15 | tx = torch.zeros(nB, nA, nH, nW) 16 | ty = torch.zeros(nB, nA, nH, nW) 17 | tw = torch.zeros(nB, nA, nH, nW) 18 | th = torch.zeros(nB, nA, nH, nW) 19 | tconf = torch.zeros(nB, nA, nH, nW) 20 | tcls = torch.zeros(nB, nA, nH, nW) 21 | 22 | nAnchors = nA * nH * nW 23 | nPixels = nH * nW 24 | for b in range(nB): 25 | cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t() 26 | cur_ious = torch.zeros(nAnchors) 27 | for t in range(50): 28 | if target[b][t * 5 + 1] == 0: 29 | break 30 | gx = target[b][t * 5 + 1] * nW 31 | gy = target[b][t * 5 + 2] * nH 32 | gw = target[b][t * 5 + 3] * nW 33 | gh = target[b][t * 5 + 4] * nH 34 | cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t() 35 | cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False)) 36 | conf_mask[b][cur_ious > sil_thresh] = 0 37 | if seen < 12800: 38 | if anchor_step == 4: 39 | tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1, 40 | 1).repeat( 41 | nB, 1, nH, nW) 42 | ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view( 43 | 1, nA, 1, 1).repeat(nB, 1, nH, nW) 44 | else: 45 | tx.fill_(0.5) 46 | ty.fill_(0.5) 47 | tw.zero_() 48 | th.zero_() 49 | coord_mask.fill_(1) 50 | 51 | nGT = 0 52 | nCorrect = 0 53 | for b in range(nB): 54 | for t in range(50): 55 | if target[b][t * 5 + 1] == 0: 56 | break 57 | nGT = nGT + 1 58 | best_iou = 0.0 59 | best_n = -1 60 | min_dist = 10000 61 | gx = target[b][t * 5 + 1] * nW 62 | gy = target[b][t * 5 + 2] * nH 63 | gi = int(gx) 64 | gj = int(gy) 65 | gw = target[b][t * 5 + 3] * nW 66 | gh = target[b][t * 5 + 4] * nH 67 | gt_box = [0, 0, gw, gh] 68 | for n in range(nA): 69 | aw = anchors[anchor_step * n] 70 | ah = anchors[anchor_step * n + 1] 71 | anchor_box = [0, 0, aw, ah] 72 | iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False) 73 | if anchor_step == 4: 74 | ax = anchors[anchor_step * n + 2] 75 | ay = anchors[anchor_step * n + 3] 76 | dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2) 77 | if iou > best_iou: 78 | best_iou = iou 79 | best_n = n 80 | elif anchor_step == 4 and iou == best_iou and dist < min_dist: 81 | best_iou = iou 82 | best_n = n 83 | min_dist = dist 84 | 85 | gt_box = [gx, gy, gw, gh] 86 | pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi] 87 | 88 | coord_mask[b][best_n][gj][gi] = 1 89 | cls_mask[b][best_n][gj][gi] = 1 90 | conf_mask[b][best_n][gj][gi] = object_scale 91 | tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi 92 | ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj 93 | tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n]) 94 | th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1]) 95 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou 96 | tconf[b][best_n][gj][gi] = iou 97 | tcls[b][best_n][gj][gi] = target[b][t * 5] 98 | if iou > 0.5: 99 | nCorrect = nCorrect + 1 100 | 101 | return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls 102 | 103 | 104 | class RegionLoss(nn.Module): 105 | def __init__(self, num_classes=0, anchors=[], num_anchors=1): 106 | super(RegionLoss, self).__init__() 107 | self.num_classes = num_classes 108 | self.anchors = anchors 109 | self.num_anchors = num_anchors 110 | self.anchor_step = len(anchors) / num_anchors 111 | self.coord_scale = 1 112 | self.noobject_scale = 1 113 | self.object_scale = 5 114 | self.class_scale = 1 115 | self.thresh = 0.6 116 | self.seen = 0 117 | 118 | def forward(self, output, target): 119 | # output : BxAs*(4+1+num_classes)*H*W 120 | t0 = time.time() 121 | nB = output.data.size(0) 122 | nA = self.num_anchors 123 | nC = self.num_classes 124 | nH = output.data.size(2) 125 | nW = output.data.size(3) 126 | 127 | output = output.view(nB, nA, (5 + nC), nH, nW) 128 | x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW)) 129 | y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW)) 130 | w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW) 131 | h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW) 132 | conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW)) 133 | cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())) 134 | cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC) 135 | t1 = time.time() 136 | 137 | pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW) 138 | grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda() 139 | grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda() 140 | anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda() 141 | anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda() 142 | anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW) 143 | anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW) 144 | pred_boxes[0] = x.data + grid_x 145 | pred_boxes[1] = y.data + grid_y 146 | pred_boxes[2] = torch.exp(w.data) * anchor_w 147 | pred_boxes[3] = torch.exp(h.data) * anchor_h 148 | pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4)) 149 | t2 = time.time() 150 | 151 | nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes, 152 | target.data, 153 | self.anchors, nA, 154 | nC, \ 155 | nH, nW, 156 | self.noobject_scale, 157 | self.object_scale, 158 | self.thresh, 159 | self.seen) 160 | cls_mask = (cls_mask == 1) 161 | nProposals = int((conf > 0.25).sum().data[0]) 162 | 163 | tx = Variable(tx.cuda()) 164 | ty = Variable(ty.cuda()) 165 | tw = Variable(tw.cuda()) 166 | th = Variable(th.cuda()) 167 | tconf = Variable(tconf.cuda()) 168 | tcls = Variable(tcls.view(-1)[cls_mask].long().cuda()) 169 | 170 | coord_mask = Variable(coord_mask.cuda()) 171 | conf_mask = Variable(conf_mask.cuda().sqrt()) 172 | cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda()) 173 | cls = cls[cls_mask].view(-1, nC) 174 | 175 | t3 = time.time() 176 | 177 | loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0 178 | loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0 179 | loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0 180 | loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0 181 | loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0 182 | loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls) 183 | loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls 184 | t4 = time.time() 185 | if False: 186 | print('-----------------------------------') 187 | print(' activation : %f' % (t1 - t0)) 188 | print(' create pred_boxes : %f' % (t2 - t1)) 189 | print(' build targets : %f' % (t3 - t2)) 190 | print(' create loss : %f' % (t4 - t3)) 191 | print(' total : %f' % (t4 - t0)) 192 | print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % ( 193 | self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], 194 | loss_conf.data[0], loss_cls.data[0], loss.data[0])) 195 | return loss 196 | -------------------------------------------------------------------------------- /tool/torch_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | import math 5 | import torch 6 | import numpy as np 7 | from torch.autograd import Variable 8 | 9 | import itertools 10 | import struct # get_image_size 11 | import imghdr # get_image_size 12 | 13 | from tool import utils 14 | 15 | 16 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True): 17 | if x1y1x2y2: 18 | mx = torch.min(boxes1[0], boxes2[0]) 19 | Mx = torch.max(boxes1[2], boxes2[2]) 20 | my = torch.min(boxes1[1], boxes2[1]) 21 | My = torch.max(boxes1[3], boxes2[3]) 22 | w1 = boxes1[2] - boxes1[0] 23 | h1 = boxes1[3] - boxes1[1] 24 | w2 = boxes2[2] - boxes2[0] 25 | h2 = boxes2[3] - boxes2[1] 26 | else: 27 | mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0) 28 | Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0) 29 | my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0) 30 | My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0) 31 | w1 = boxes1[2] 32 | h1 = boxes1[3] 33 | w2 = boxes2[2] 34 | h2 = boxes2[3] 35 | uw = Mx - mx 36 | uh = My - my 37 | cw = w1 + w2 - uw 38 | ch = h1 + h2 - uh 39 | mask = ((cw <= 0) + (ch <= 0) > 0) 40 | area1 = w1 * h1 41 | area2 = w2 * h2 42 | carea = cw * ch 43 | carea[mask] = 0 44 | uarea = area1 + area2 - carea 45 | return carea / uarea 46 | 47 | 48 | def get_region_boxes(boxes_and_confs): 49 | 50 | # print('Getting boxes from boxes and confs ...') 51 | 52 | boxes_list = [] 53 | confs_list = [] 54 | 55 | for item in boxes_and_confs: 56 | boxes_list.append(item[0]) 57 | confs_list.append(item[1]) 58 | 59 | # boxes: [batch, num1 + num2 + num3, 1, 4] 60 | # confs: [batch, num1 + num2 + num3, num_classes] 61 | boxes = torch.cat(boxes_list, dim=1) 62 | confs = torch.cat(confs_list, dim=1) 63 | 64 | return [boxes, confs] 65 | 66 | 67 | def convert2cpu(gpu_matrix): 68 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix) 69 | 70 | 71 | def convert2cpu_long(gpu_matrix): 72 | return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix) 73 | 74 | 75 | 76 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1): 77 | model.eval() 78 | t0 = time.time() 79 | 80 | if type(img) == np.ndarray and len(img.shape) == 3: # cv2 image 81 | img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0) 82 | elif type(img) == np.ndarray and len(img.shape) == 4: 83 | img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0) 84 | else: 85 | print("unknow image type") 86 | exit(-1) 87 | 88 | if use_cuda: 89 | img = img.cuda() 90 | img = torch.autograd.Variable(img) 91 | 92 | t1 = time.time() 93 | 94 | output = model(img) 95 | 96 | t2 = time.time() 97 | 98 | print('-----------------------------------') 99 | print(' Preprocess : %f' % (t1 - t0)) 100 | print(' Model Inference : %f' % (t2 - t1)) 101 | print('-----------------------------------') 102 | 103 | return utils.post_processing(img, conf_thresh, nms_thresh, output) 104 | 105 | -------------------------------------------------------------------------------- /tool/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | import math 5 | import numpy as np 6 | 7 | import itertools 8 | import struct # get_image_size 9 | import imghdr # get_image_size 10 | 11 | 12 | def sigmoid(x): 13 | return 1.0 / (np.exp(-x) + 1.) 14 | 15 | 16 | def softmax(x): 17 | x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1)) 18 | x = x / np.expand_dims(x.sum(axis=1), axis=1) 19 | return x 20 | 21 | 22 | def bbox_iou(box1, box2, x1y1x2y2=True): 23 | 24 | # print('iou box1:', box1) 25 | # print('iou box2:', box2) 26 | 27 | if x1y1x2y2: 28 | mx = min(box1[0], box2[0]) 29 | Mx = max(box1[2], box2[2]) 30 | my = min(box1[1], box2[1]) 31 | My = max(box1[3], box2[3]) 32 | w1 = box1[2] - box1[0] 33 | h1 = box1[3] - box1[1] 34 | w2 = box2[2] - box2[0] 35 | h2 = box2[3] - box2[1] 36 | else: 37 | w1 = box1[2] 38 | h1 = box1[3] 39 | w2 = box2[2] 40 | h2 = box2[3] 41 | 42 | mx = min(box1[0], box2[0]) 43 | Mx = max(box1[0] + w1, box2[0] + w2) 44 | my = min(box1[1], box2[1]) 45 | My = max(box1[1] + h1, box2[1] + h2) 46 | uw = Mx - mx 47 | uh = My - my 48 | cw = w1 + w2 - uw 49 | ch = h1 + h2 - uh 50 | carea = 0 51 | if cw <= 0 or ch <= 0: 52 | return 0.0 53 | 54 | area1 = w1 * h1 55 | area2 = w2 * h2 56 | carea = cw * ch 57 | uarea = area1 + area2 - carea 58 | return carea / uarea 59 | 60 | 61 | def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False): 62 | # print(boxes.shape) 63 | x1 = boxes[:, 0] 64 | y1 = boxes[:, 1] 65 | x2 = boxes[:, 2] 66 | y2 = boxes[:, 3] 67 | 68 | areas = (x2 - x1) * (y2 - y1) 69 | order = confs.argsort()[::-1] 70 | 71 | keep = [] 72 | while order.size > 0: 73 | idx_self = order[0] 74 | idx_other = order[1:] 75 | 76 | keep.append(idx_self) 77 | 78 | xx1 = np.maximum(x1[idx_self], x1[idx_other]) 79 | yy1 = np.maximum(y1[idx_self], y1[idx_other]) 80 | xx2 = np.minimum(x2[idx_self], x2[idx_other]) 81 | yy2 = np.minimum(y2[idx_self], y2[idx_other]) 82 | 83 | w = np.maximum(0.0, xx2 - xx1) 84 | h = np.maximum(0.0, yy2 - yy1) 85 | inter = w * h 86 | 87 | if min_mode: 88 | over = inter / np.minimum(areas[order[0]], areas[order[1:]]) 89 | else: 90 | over = inter / (areas[order[0]] + areas[order[1:]] - inter) 91 | 92 | inds = np.where(over <= nms_thresh)[0] 93 | order = order[inds + 1] 94 | 95 | return np.array(keep) 96 | 97 | 98 | 99 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None): 100 | import cv2 101 | img = np.copy(img) 102 | colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32) 103 | 104 | def get_color(c, x, max_val): 105 | ratio = float(x) / max_val * 5 106 | i = int(math.floor(ratio)) 107 | j = int(math.ceil(ratio)) 108 | ratio = ratio - i 109 | r = (1 - ratio) * colors[i][c] + ratio * colors[j][c] 110 | return int(r * 255) 111 | 112 | width = img.shape[1] 113 | height = img.shape[0] 114 | for i in range(len(boxes)): 115 | box = boxes[i] 116 | x1 = int(box[0] * width) 117 | y1 = int(box[1] * height) 118 | x2 = int(box[2] * width) 119 | y2 = int(box[3] * height) 120 | 121 | if color: 122 | rgb = color 123 | else: 124 | rgb = (255, 0, 0) 125 | if len(box) >= 7 and class_names: 126 | cls_conf = box[5] 127 | cls_id = box[6] 128 | print('%s: %f' % (class_names[cls_id], cls_conf)) 129 | classes = len(class_names) 130 | offset = cls_id * 123457 % classes 131 | red = get_color(2, offset, classes) 132 | green = get_color(1, offset, classes) 133 | blue = get_color(0, offset, classes) 134 | if color is None: 135 | rgb = (red, green, blue) 136 | img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1) 137 | img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1) 138 | if savename: 139 | print("save plot results to %s" % savename) 140 | cv2.imwrite(savename, img) 141 | return img 142 | 143 | 144 | def read_truths(lab_path): 145 | if not os.path.exists(lab_path): 146 | return np.array([]) 147 | if os.path.getsize(lab_path): 148 | truths = np.loadtxt(lab_path) 149 | truths = truths.reshape(truths.size / 5, 5) # to avoid single truth problem 150 | return truths 151 | else: 152 | return np.array([]) 153 | 154 | 155 | def load_class_names(namesfile): 156 | class_names = [] 157 | with open(namesfile, 'r') as fp: 158 | lines = fp.readlines() 159 | for line in lines: 160 | line = line.rstrip() 161 | class_names.append(line) 162 | return class_names 163 | 164 | 165 | 166 | def post_processing(img, conf_thresh, nms_thresh, output): 167 | 168 | # anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401] 169 | # num_anchors = 9 170 | # anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]] 171 | # strides = [8, 16, 32] 172 | # anchor_step = len(anchors) // num_anchors 173 | 174 | # [batch, num, 1, 4] 175 | box_array = output[0] 176 | # [batch, num, num_classes] 177 | confs = output[1] 178 | 179 | t1 = time.time() 180 | 181 | if type(box_array).__name__ != 'ndarray': 182 | box_array = box_array.cpu().detach().numpy() 183 | confs = confs.cpu().detach().numpy() 184 | 185 | num_classes = confs.shape[2] 186 | 187 | # [batch, num, 4] 188 | box_array = box_array[:, :, 0] 189 | 190 | # [batch, num, num_classes] --> [batch, num] 191 | max_conf = np.max(confs, axis=2) 192 | max_id = np.argmax(confs, axis=2) 193 | 194 | t2 = time.time() 195 | 196 | bboxes_batch = [] 197 | for i in range(box_array.shape[0]): 198 | 199 | argwhere = max_conf[i] > conf_thresh 200 | l_box_array = box_array[i, argwhere, :] 201 | l_max_conf = max_conf[i, argwhere] 202 | l_max_id = max_id[i, argwhere] 203 | 204 | bboxes = [] 205 | # nms for each class 206 | for j in range(num_classes): 207 | 208 | cls_argwhere = l_max_id == j 209 | ll_box_array = l_box_array[cls_argwhere, :] 210 | ll_max_conf = l_max_conf[cls_argwhere] 211 | ll_max_id = l_max_id[cls_argwhere] 212 | 213 | keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh) 214 | 215 | if (keep.size > 0): 216 | ll_box_array = ll_box_array[keep, :] 217 | ll_max_conf = ll_max_conf[keep] 218 | ll_max_id = ll_max_id[keep] 219 | 220 | for k in range(ll_box_array.shape[0]): 221 | bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]]) 222 | 223 | bboxes_batch.append(bboxes) 224 | 225 | t3 = time.time() 226 | 227 | print('-----------------------------------') 228 | print(' max and argmax : %f' % (t2 - t1)) 229 | print(' nms : %f' % (t3 - t2)) 230 | print('Post processing total : %f' % (t3 - t1)) 231 | print('-----------------------------------') 232 | 233 | return bboxes_batch 234 | -------------------------------------------------------------------------------- /tool/utils_iou.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ''' 3 | 4 | ''' 5 | import torch 6 | import os, sys 7 | from torch.nn import functional as F 8 | 9 | import numpy as np 10 | from packaging import version 11 | 12 | 13 | __all__ = [ 14 | "bboxes_iou", 15 | "bboxes_giou", 16 | "bboxes_diou", 17 | "bboxes_ciou", 18 | ] 19 | 20 | 21 | if version.parse(torch.__version__) >= version.parse('1.5.0'): 22 | def _true_divide(dividend, divisor): 23 | return torch.true_divide(dividend, divisor) 24 | else: 25 | def _true_divide(dividend, divisor): 26 | return dividend / divisor 27 | 28 | def bboxes_iou(bboxes_a, bboxes_b, fmt='voc', iou_type='iou'): 29 | """Calculate the Intersection of Unions (IoUs) between bounding boxes. 30 | IoU is calculated as a ratio of area of the intersection 31 | and area of the union. 32 | 33 | Args: 34 | bbox_a (array): An array whose shape is :math:`(N, 4)`. 35 | :math:`N` is the number of bounding boxes. 36 | The dtype should be :obj:`numpy.float32`. 37 | bbox_b (array): An array similar to :obj:`bbox_a`, 38 | whose shape is :math:`(K, 4)`. 39 | The dtype should be :obj:`numpy.float32`. 40 | Returns: 41 | array: 42 | An array whose shape is :math:`(N, K)`. \ 43 | An element at index :math:`(n, k)` contains IoUs between \ 44 | :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \ 45 | box in :obj:`bbox_b`. 46 | 47 | from: https://github.com/chainer/chainercv 48 | """ 49 | if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: 50 | raise IndexError 51 | 52 | N, K = bboxes_a.shape[0], bboxes_b.shape[0] 53 | 54 | if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax 55 | # top left 56 | tl_intersect = torch.max( 57 | bboxes_a[:, np.newaxis, :2], 58 | bboxes_b[:, :2] 59 | ) # of shape `(N,K,2)` 60 | # bottom right 61 | br_intersect = torch.min( 62 | bboxes_a[:, np.newaxis, 2:], 63 | bboxes_b[:, 2:] 64 | ) 65 | bb_a = bboxes_a[:, 2:] - bboxes_a[:, :2] 66 | bb_b = bboxes_b[:, 2:] - bboxes_b[:, :2] 67 | # bb_* can also be seen vectors representing box_width, box_height 68 | elif fmt.lower() == 'yolo': # xcen, ycen, w, h 69 | # top left 70 | tl_intersect = torch.max( 71 | bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2, 72 | bboxes_b[:, :2] - bboxes_b[:, 2:] / 2 73 | ) 74 | # bottom right 75 | br_intersect = torch.min( 76 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2, 77 | bboxes_b[:, :2] + bboxes_b[:, 2:] / 2 78 | ) 79 | bb_a = bboxes_a[:, 2:] 80 | bb_b = bboxes_b[:, 2:] 81 | elif fmt.lower() == 'coco': # xmin, ymin, w, h 82 | # top left 83 | tl_intersect = torch.max( 84 | bboxes_a[:, np.newaxis, :2], 85 | bboxes_b[:, :2] 86 | ) 87 | # bottom right 88 | br_intersect = torch.min( 89 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:], 90 | bboxes_b[:, :2] + bboxes_b[:, 2:] 91 | ) 92 | bb_a = bboxes_a[:, 2:] 93 | bb_b = bboxes_b[:, 2:] 94 | 95 | area_a = torch.prod(bb_a, 1) 96 | area_b = torch.prod(bb_b, 1) 97 | 98 | # torch.prod(input, dim, keepdim=False, dtype=None) → Tensor 99 | # Returns the product of each row of the input tensor in the given dimension dim 100 | # if tl, br does not form a nondegenerate squre, then the corr. element in the `prod` would be 0 101 | en = (tl_intersect < br_intersect).type(tl_intersect.type()).prod(dim=2) # shape `(N,K,2)` ---> shape `(N,K)` 102 | 103 | area_intersect = torch.prod(br_intersect - tl_intersect, 2) * en # * ((tl < br).all()) 104 | area_union = (area_a[:, np.newaxis] + area_b - area_intersect) 105 | 106 | iou = _true_divide(area_intersect, area_union) 107 | 108 | if iou_type.lower() == 'iou': 109 | return iou 110 | 111 | if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax 112 | # top left 113 | tl_union = torch.min( 114 | bboxes_a[:, np.newaxis, :2], 115 | bboxes_b[:, :2] 116 | ) # of shape `(N,K,2)` 117 | # bottom right 118 | br_union = torch.max( 119 | bboxes_a[:, np.newaxis, 2:], 120 | bboxes_b[:, 2:] 121 | ) 122 | elif fmt.lower() == 'yolo': # xcen, ycen, w, h 123 | # top left 124 | tl_union = torch.min( 125 | bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2, 126 | bboxes_b[:, :2] - bboxes_b[:, 2:] / 2 127 | ) 128 | # bottom right 129 | br_union = torch.max( 130 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2, 131 | bboxes_b[:, :2] + bboxes_b[:, 2:] / 2 132 | ) 133 | elif fmt.lower() == 'coco': # xmin, ymin, w, h 134 | # top left 135 | tl_union = torch.min( 136 | bboxes_a[:, np.newaxis, :2], 137 | bboxes_b[:, :2] 138 | ) 139 | # bottom right 140 | br_union = torch.max( 141 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:], 142 | bboxes_b[:, :2] + bboxes_b[:, 2:] 143 | ) 144 | 145 | # c for covering, of shape `(N,K,2)` 146 | # the last dim is box width, box hight 147 | bboxes_c = br_union - tl_union 148 | 149 | area_covering = torch.prod(bboxes_c, 2) # shape `(N,K)` 150 | 151 | giou = iou - _true_divide(area_covering - area_union, area_covering) 152 | 153 | if iou_type.lower() == 'giou': 154 | return giou 155 | 156 | if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax 157 | centre_a = (bboxes_a[..., 2 :] + bboxes_a[..., : 2]) / 2 158 | centre_b = (bboxes_b[..., 2 :] + bboxes_b[..., : 2]) / 2 159 | elif fmt.lower() == 'yolo': # xcen, ycen, w, h 160 | centre_a = bboxes_a[..., : 2] 161 | centre_b = bboxes_b[..., : 2] 162 | elif fmt.lower() == 'coco': # xmin, ymin, w, h 163 | centre_a = bboxes_a[..., 2 :] + bboxes_a[..., : 2]/2 164 | centre_b = bboxes_b[..., 2 :] + bboxes_b[..., : 2]/2 165 | 166 | centre_dist = torch.norm(centre_a[:, np.newaxis] - centre_b, p='fro', dim=2) 167 | diag_len = torch.norm(bboxes_c, p='fro', dim=2) 168 | 169 | diou = iou - _true_divide(centre_dist.pow(2), diag_len.pow(2)) 170 | 171 | if iou_type.lower() == 'diou': 172 | return diou 173 | 174 | """ the legacy custom cosine similarity: 175 | 176 | # bb_a of shape `(N,2)`, bb_b of shape `(K,2)` 177 | v = torch.einsum('nm,km->nk', bb_a, bb_b) 178 | v = _true_divide(v, (torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1))) 179 | # avoid nan for torch.acos near \pm 1 180 | # https://github.com/pytorch/pytorch/issues/8069 181 | eps = 1e-7 182 | v = torch.clamp(v, -1+eps, 1-eps) 183 | """ 184 | v = F.cosine_similarity(bb_a[:,np.newaxis,:], bb_b, dim=-1) 185 | v = (_true_divide(2*torch.acos(v), np.pi)).pow(2) 186 | with torch.no_grad(): 187 | alpha = (_true_divide(v, 1-iou+v)) * ((iou>=0.5).type(iou.type())) 188 | 189 | ciou = diou - alpha * v 190 | 191 | if iou_type.lower() == 'ciou': 192 | return ciou 193 | 194 | 195 | def bboxes_giou(bboxes_a, bboxes_b, fmt='voc'): 196 | return bboxes_iou(bboxes_a, bboxes_b, fmt, 'giou') 197 | 198 | 199 | def bboxes_diou(bboxes_a, bboxes_b, fmt='voc'): 200 | return bboxes_iou(bboxes_a, bboxes_b, fmt, 'diou') 201 | 202 | 203 | def bboxes_ciou(bboxes_a, bboxes_b, fmt='voc'): 204 | return bboxes_iou(bboxes_a, bboxes_b, fmt, 'ciou') 205 | -------------------------------------------------------------------------------- /tools/freeze_model.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import argparse 3 | import tensorflow as tf 4 | import tensorflow.contrib.slim as slim 5 | 6 | 7 | def _batch_norm_fn(x, scope=None): 8 | if scope is None: 9 | scope = tf.get_variable_scope().name + "/bn" 10 | return slim.batch_norm(x, scope=scope) 11 | 12 | 13 | def create_link( 14 | incoming, network_builder, scope, nonlinearity=tf.nn.elu, 15 | weights_initializer=tf.truncated_normal_initializer(stddev=1e-3), 16 | regularizer=None, is_first=False, summarize_activations=True): 17 | if is_first: 18 | network = incoming 19 | else: 20 | network = _batch_norm_fn(incoming, scope=scope + "/bn") 21 | network = nonlinearity(network) 22 | if summarize_activations: 23 | tf.summary.histogram(scope+"/activations", network) 24 | 25 | pre_block_network = network 26 | post_block_network = network_builder(pre_block_network, scope) 27 | 28 | incoming_dim = pre_block_network.get_shape().as_list()[-1] 29 | outgoing_dim = post_block_network.get_shape().as_list()[-1] 30 | if incoming_dim != outgoing_dim: 31 | assert outgoing_dim == 2 * incoming_dim, \ 32 | "%d != %d" % (outgoing_dim, 2 * incoming) 33 | projection = slim.conv2d( 34 | incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None, 35 | scope=scope+"/projection", weights_initializer=weights_initializer, 36 | biases_initializer=None, weights_regularizer=regularizer) 37 | network = projection + post_block_network 38 | else: 39 | network = incoming + post_block_network 40 | return network 41 | 42 | 43 | def create_inner_block( 44 | incoming, scope, nonlinearity=tf.nn.elu, 45 | weights_initializer=tf.truncated_normal_initializer(1e-3), 46 | bias_initializer=tf.zeros_initializer(), regularizer=None, 47 | increase_dim=False, summarize_activations=True): 48 | n = incoming.get_shape().as_list()[-1] 49 | stride = 1 50 | if increase_dim: 51 | n *= 2 52 | stride = 2 53 | 54 | incoming = slim.conv2d( 55 | incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME", 56 | normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer, 57 | biases_initializer=bias_initializer, weights_regularizer=regularizer, 58 | scope=scope + "/1") 59 | if summarize_activations: 60 | tf.summary.histogram(incoming.name + "/activations", incoming) 61 | 62 | incoming = slim.dropout(incoming, keep_prob=0.6) 63 | 64 | incoming = slim.conv2d( 65 | incoming, n, [3, 3], 1, activation_fn=None, padding="SAME", 66 | normalizer_fn=None, weights_initializer=weights_initializer, 67 | biases_initializer=bias_initializer, weights_regularizer=regularizer, 68 | scope=scope + "/2") 69 | return incoming 70 | 71 | 72 | def residual_block(incoming, scope, nonlinearity=tf.nn.elu, 73 | weights_initializer=tf.truncated_normal_initializer(1e3), 74 | bias_initializer=tf.zeros_initializer(), regularizer=None, 75 | increase_dim=False, is_first=False, 76 | summarize_activations=True): 77 | 78 | def network_builder(x, s): 79 | return create_inner_block( 80 | x, s, nonlinearity, weights_initializer, bias_initializer, 81 | regularizer, increase_dim, summarize_activations) 82 | 83 | return create_link( 84 | incoming, network_builder, scope, nonlinearity, weights_initializer, 85 | regularizer, is_first, summarize_activations) 86 | 87 | 88 | def _create_network(incoming, reuse=None, weight_decay=1e-8): 89 | nonlinearity = tf.nn.elu 90 | conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) 91 | conv_bias_init = tf.zeros_initializer() 92 | conv_regularizer = slim.l2_regularizer(weight_decay) 93 | fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) 94 | fc_bias_init = tf.zeros_initializer() 95 | fc_regularizer = slim.l2_regularizer(weight_decay) 96 | 97 | def batch_norm_fn(x): 98 | return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") 99 | 100 | network = incoming 101 | network = slim.conv2d( 102 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity, 103 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", 104 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, 105 | weights_regularizer=conv_regularizer) 106 | network = slim.conv2d( 107 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity, 108 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", 109 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, 110 | weights_regularizer=conv_regularizer) 111 | 112 | # NOTE(nwojke): This is missing a padding="SAME" to match the CNN 113 | # architecture in Table 1 of the paper. Information on how this affects 114 | # performance on MOT 16 training sequences can be found in 115 | # issue 10 https://github.com/nwojke/deep_sort/issues/10 116 | network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") 117 | 118 | network = residual_block( 119 | network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, 120 | conv_regularizer, increase_dim=False, is_first=True) 121 | network = residual_block( 122 | network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, 123 | conv_regularizer, increase_dim=False) 124 | 125 | network = residual_block( 126 | network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, 127 | conv_regularizer, increase_dim=True) 128 | network = residual_block( 129 | network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, 130 | conv_regularizer, increase_dim=False) 131 | 132 | network = residual_block( 133 | network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, 134 | conv_regularizer, increase_dim=True) 135 | network = residual_block( 136 | network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, 137 | conv_regularizer, increase_dim=False) 138 | 139 | feature_dim = network.get_shape().as_list()[-1] 140 | network = slim.flatten(network) 141 | 142 | network = slim.dropout(network, keep_prob=0.6) 143 | network = slim.fully_connected( 144 | network, feature_dim, activation_fn=nonlinearity, 145 | normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, 146 | scope="fc1", weights_initializer=fc_weight_init, 147 | biases_initializer=fc_bias_init) 148 | 149 | features = network 150 | 151 | # Features in rows, normalize axis 1. 152 | features = slim.batch_norm(features, scope="ball", reuse=reuse) 153 | feature_norm = tf.sqrt( 154 | tf.constant(1e-8, tf.float32) + 155 | tf.reduce_sum(tf.square(features), [1], keepdims=True)) 156 | features = features / feature_norm 157 | return features, None 158 | 159 | 160 | def _network_factory(weight_decay=1e-8): 161 | 162 | def factory_fn(image, reuse): 163 | with slim.arg_scope([slim.batch_norm, slim.dropout], 164 | is_training=False): 165 | with slim.arg_scope([slim.conv2d, slim.fully_connected, 166 | slim.batch_norm, slim.layer_norm], 167 | reuse=reuse): 168 | features, logits = _create_network( 169 | image, reuse=reuse, weight_decay=weight_decay) 170 | return features, logits 171 | 172 | return factory_fn 173 | 174 | 175 | def _preprocess(image): 176 | image = image[:, :, ::-1] # BGR to RGB 177 | return image 178 | 179 | 180 | def parse_args(): 181 | """Parse command line arguments. 182 | """ 183 | parser = argparse.ArgumentParser(description="Freeze old model") 184 | parser.add_argument( 185 | "--checkpoint_in", 186 | default="resources/networks/mars-small128.ckpt-68577", 187 | help="Path to checkpoint file") 188 | parser.add_argument( 189 | "--graphdef_out", 190 | default="resources/networks/mars-small128.pb") 191 | return parser.parse_args() 192 | 193 | 194 | def main(): 195 | args = parse_args() 196 | 197 | with tf.Session(graph=tf.Graph()) as session: 198 | input_var = tf.placeholder( 199 | tf.uint8, (None, 128, 64, 3), name="images") 200 | image_var = tf.map_fn( 201 | lambda x: _preprocess(x), tf.cast(input_var, tf.float32), 202 | back_prop=False) 203 | 204 | factory_fn = _network_factory() 205 | features, _ = factory_fn(image_var, reuse=None) 206 | features = tf.identity(features, name="features") 207 | 208 | saver = tf.train.Saver(slim.get_variables_to_restore()) 209 | saver.restore(session, args.checkpoint_in) 210 | 211 | output_graph_def = tf.graph_util.convert_variables_to_constants( 212 | session, tf.get_default_graph().as_graph_def(), 213 | [features.name.split(":")[0]]) 214 | with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle: 215 | file_handle.write(output_graph_def.SerializeToString()) 216 | 217 | 218 | if __name__ == "__main__": 219 | main() 220 | -------------------------------------------------------------------------------- /tools/generate_clip_detections.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import errno 4 | import argparse 5 | import numpy as np 6 | import cv2 7 | import torch 8 | from PIL import Image 9 | 10 | 11 | def _run_in_batches(f, data_dict, out, batch_size): 12 | data_len = len(out) 13 | num_batches = int(data_len / batch_size) 14 | 15 | s, e = 0, 0 16 | for i in range(num_batches): 17 | s, e = i * batch_size, (i + 1) * batch_size 18 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()} 19 | out[s:e] = f(batch_data_dict) 20 | if e < len(out): 21 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()} 22 | out[e:] = f(batch_data_dict) 23 | 24 | 25 | def extract_image_patch(image, bbox, patch_shape=None): 26 | """Extract image patch from bounding box. 27 | 28 | Parameters 29 | ---------- 30 | image : ndarray 31 | The full image. 32 | bbox : array_like 33 | The bounding box in format (x, y, width, height). 34 | patch_shape : Optional[array_like] 35 | This parameter can be used to enforce a desired patch shape 36 | (height, width). First, the `bbox` is adapted to the aspect ratio 37 | of the patch shape, then it is clipped at the image boundaries. 38 | If None, the shape is computed from :arg:`bbox`. 39 | 40 | Returns 41 | ------- 42 | ndarray | NoneType 43 | An image patch showing the :arg:`bbox`, optionally reshaped to 44 | :arg:`patch_shape`. 45 | Returns None if the bounding box is empty or fully outside of the image 46 | boundaries. 47 | 48 | """ 49 | bbox = np.array(bbox.cpu()) 50 | if patch_shape is not None: 51 | # correct aspect ratio to patch shape 52 | target_aspect = float(patch_shape[1]) / patch_shape[0] 53 | new_width = target_aspect * bbox[3] 54 | bbox[0] -= (new_width - bbox[2]) / 2 55 | bbox[2] = new_width 56 | 57 | # convert to top left, bottom right 58 | bbox[2:] += bbox[:2] 59 | bbox = bbox.astype(np.int) 60 | 61 | # clip at image boundaries 62 | bbox[:2] = np.maximum(0, bbox[:2]) 63 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:]) 64 | if np.any(bbox[:2] >= bbox[2:]): 65 | return None 66 | sx, sy, ex, ey = bbox 67 | image = image[sy:ey, sx:ex] 68 | 69 | #image = cv2.resize(image, tuple(patch_shape[::-1])) 70 | return image 71 | 72 | 73 | class ImageEncoder(object): 74 | 75 | def __init__(self, model, transform, device): 76 | 77 | 78 | self.model = model 79 | self.transform = transform 80 | self.device = device 81 | 82 | def __call__(self, data_x, batch_size=32): 83 | out = [] 84 | #data_x = [i for i in data_x if i is not None] 85 | 86 | #print("[ZSOT ImageEncoder] num_none: {}".format(len(num_none))) 87 | for patch in range(len(data_x)): 88 | if self.device == "cpu": 89 | img = self.transform(Image.fromarray(data_x[patch])) 90 | else: 91 | img = self.transform(Image.fromarray(data_x[patch])).cuda() 92 | out.append(img) 93 | 94 | features = self.model.encode_image(torch.stack(out)).cpu().numpy() 95 | for idx, i in enumerate(features): 96 | if np.isnan(i[0]): 97 | print("nan values") 98 | # features[idx] = np.zeros(512) 99 | # cv2.imshow("image", data_x[idx]) 100 | # cv2.waitKey(0) 101 | 102 | return features 103 | 104 | 105 | def create_box_encoder(model, transform, batch_size=32, device="cpu"): 106 | image_encoder = ImageEncoder(model, transform, device) 107 | 108 | def encoder(image, boxes): 109 | image_patches = [] 110 | for box in boxes: 111 | #print("extracting box {} from image {}".format(box, image.shape)) 112 | patch = extract_image_patch(image, box) 113 | 114 | if patch is None: 115 | print("WARNING: Failed to extract image patch: %s." % str(box)) 116 | patch = np.random.uniform( 117 | 0., 255., image.shape).astype(np.uint8) 118 | image_patches.append(patch) 119 | #image_patches = np.array(image_patches) 120 | return image_encoder(image_patches, batch_size) 121 | 122 | return encoder 123 | 124 | 125 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None): 126 | """Generate detections with features. 127 | 128 | Parameters 129 | ---------- 130 | encoder : Callable[image, ndarray] -> ndarray 131 | The encoder function takes as input a BGR color image and a matrix of 132 | bounding boxes in format `(x, y, w, h)` and returns a matrix of 133 | corresponding feature vectors. 134 | mot_dir : str 135 | Path to the MOTChallenge directory (can be either train or test). 136 | output_dir 137 | Path to the output directory. Will be created if it does not exist. 138 | detection_dir 139 | Path to custom detections. The directory structure should be the default 140 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the 141 | standard MOTChallenge detections. 142 | 143 | """ 144 | if detection_dir is None: 145 | detection_dir = mot_dir 146 | try: 147 | os.makedirs(output_dir) 148 | except OSError as exception: 149 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir): 150 | pass 151 | else: 152 | raise ValueError( 153 | "Failed to created output directory '%s'" % output_dir) 154 | 155 | for sequence in os.listdir(mot_dir): 156 | print("Processing %s" % sequence) 157 | sequence_dir = os.path.join(mot_dir, sequence) 158 | 159 | image_dir = os.path.join(sequence_dir, "img1") 160 | image_filenames = { 161 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f) 162 | for f in os.listdir(image_dir)} 163 | 164 | detection_file = os.path.join( 165 | detection_dir, sequence, "det/det.txt") 166 | detections_in = np.loadtxt(detection_file, delimiter=',') 167 | detections_out = [] 168 | 169 | frame_indices = detections_in[:, 0].astype(np.int) 170 | min_frame_idx = frame_indices.astype(np.int).min() 171 | max_frame_idx = frame_indices.astype(np.int).max() 172 | for frame_idx in range(min_frame_idx, max_frame_idx + 1): 173 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx)) 174 | mask = frame_indices == frame_idx 175 | rows = detections_in[mask] 176 | 177 | if frame_idx not in image_filenames: 178 | print("WARNING could not find image for frame %d" % frame_idx) 179 | continue 180 | bgr_image = cv2.imread( 181 | image_filenames[frame_idx], cv2.IMREAD_COLOR) 182 | features = encoder(bgr_image, rows[:, 2:6].copy()) 183 | detections_out += [np.r_[(row, feature)] for row, feature 184 | in zip(rows, features)] 185 | 186 | output_filename = os.path.join(output_dir, "%s.npy" % sequence) 187 | np.save( 188 | output_filename, np.asarray(detections_out), allow_pickle=False) 189 | 190 | 191 | def parse_args(): 192 | """Parse command line arguments. 193 | """ 194 | parser = argparse.ArgumentParser(description="Re-ID feature extractor") 195 | parser.add_argument( 196 | "--model", 197 | default="resources/networks/mars-small128.pb", 198 | help="Path to freezed inference graph protobuf.") 199 | parser.add_argument( 200 | "--mot_dir", help="Path to MOTChallenge directory (train or test)", 201 | required=True) 202 | parser.add_argument( 203 | "--detection_dir", help="Path to custom detections. Defaults to " 204 | "standard MOT detections Directory structure should be the default " 205 | "MOTChallenge structure: [sequence]/det/det.txt", default=None) 206 | parser.add_argument( 207 | "--output_dir", help="Output directory. Will be created if it does not" 208 | " exist.", default="detections") 209 | return parser.parse_args() 210 | 211 | 212 | def main(): 213 | args = parse_args() 214 | encoder = create_box_encoder(args.model, batch_size=32) 215 | generate_detections(encoder, args.mot_dir, args.output_dir, 216 | args.detection_dir) 217 | 218 | 219 | if __name__ == "__main__": 220 | main() 221 | -------------------------------------------------------------------------------- /tools/generate_detections.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import errno 4 | import argparse 5 | import numpy as np 6 | import cv2 7 | import tensorflow.compat.v1 as tf 8 | 9 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 10 | if len(physical_devices) > 0: 11 | tf.config.experimental.set_memory_growth(physical_devices[0], True) 12 | 13 | def _run_in_batches(f, data_dict, out, batch_size): 14 | data_len = len(out) 15 | num_batches = int(data_len / batch_size) 16 | 17 | s, e = 0, 0 18 | for i in range(num_batches): 19 | s, e = i * batch_size, (i + 1) * batch_size 20 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()} 21 | out[s:e] = f(batch_data_dict) 22 | if e < len(out): 23 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()} 24 | out[e:] = f(batch_data_dict) 25 | 26 | 27 | def extract_image_patch(image, bbox, patch_shape): 28 | """Extract image patch from bounding box. 29 | 30 | Parameters 31 | ---------- 32 | image : ndarray 33 | The full image. 34 | bbox : array_like 35 | The bounding box in format (x, y, width, height). 36 | patch_shape : Optional[array_like] 37 | This parameter can be used to enforce a desired patch shape 38 | (height, width). First, the `bbox` is adapted to the aspect ratio 39 | of the patch shape, then it is clipped at the image boundaries. 40 | If None, the shape is computed from :arg:`bbox`. 41 | 42 | Returns 43 | ------- 44 | ndarray | NoneType 45 | An image patch showing the :arg:`bbox`, optionally reshaped to 46 | :arg:`patch_shape`. 47 | Returns None if the bounding box is empty or fully outside of the image 48 | boundaries. 49 | 50 | """ 51 | bbox = np.array(bbox) 52 | if patch_shape is not None: 53 | # correct aspect ratio to patch shape 54 | target_aspect = float(patch_shape[1]) / patch_shape[0] 55 | new_width = target_aspect * bbox[3] 56 | bbox[0] -= (new_width - bbox[2]) / 2 57 | bbox[2] = new_width 58 | 59 | # convert to top left, bottom right 60 | bbox[2:] += bbox[:2] 61 | bbox = bbox.astype(np.int) 62 | 63 | # clip at image boundaries 64 | bbox[:2] = np.maximum(0, bbox[:2]) 65 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:]) 66 | if np.any(bbox[:2] >= bbox[2:]): 67 | return None 68 | sx, sy, ex, ey = bbox 69 | image = image[sy:ey, sx:ex] 70 | image = cv2.resize(image, tuple(patch_shape[::-1])) 71 | return image 72 | 73 | 74 | class ImageEncoder(object): 75 | 76 | def __init__(self, checkpoint_filename, input_name="images", 77 | output_name="features"): 78 | self.session = tf.Session() 79 | with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle: 80 | graph_def = tf.GraphDef() 81 | graph_def.ParseFromString(file_handle.read()) 82 | tf.import_graph_def(graph_def, name="net") 83 | self.input_var = tf.get_default_graph().get_tensor_by_name( 84 | "%s:0" % input_name) 85 | self.output_var = tf.get_default_graph().get_tensor_by_name( 86 | "%s:0" % output_name) 87 | 88 | assert len(self.output_var.get_shape()) == 2 89 | assert len(self.input_var.get_shape()) == 4 90 | self.feature_dim = self.output_var.get_shape().as_list()[-1] 91 | self.image_shape = self.input_var.get_shape().as_list()[1:] 92 | 93 | def __call__(self, data_x, batch_size=32): 94 | out = np.zeros((len(data_x), self.feature_dim), np.float32) 95 | _run_in_batches( 96 | lambda x: self.session.run(self.output_var, feed_dict=x), 97 | {self.input_var: data_x}, out, batch_size) 98 | return out 99 | 100 | 101 | def create_box_encoder(model_filename, input_name="images", 102 | output_name="features", batch_size=32): 103 | image_encoder = ImageEncoder(model_filename, input_name, output_name) 104 | image_shape = image_encoder.image_shape 105 | 106 | def encoder(image, boxes): 107 | image_patches = [] 108 | for box in boxes: 109 | patch = extract_image_patch(image, box, image_shape[:2]) 110 | if patch is None: 111 | print("WARNING: Failed to extract image patch: %s." % str(box)) 112 | patch = np.random.uniform( 113 | 0., 255., image_shape).astype(np.uint8) 114 | image_patches.append(patch) 115 | image_patches = np.asarray(image_patches) 116 | return image_encoder(image_patches, batch_size) 117 | 118 | return encoder 119 | 120 | 121 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None): 122 | """Generate detections with features. 123 | 124 | Parameters 125 | ---------- 126 | encoder : Callable[image, ndarray] -> ndarray 127 | The encoder function takes as input a BGR color image and a matrix of 128 | bounding boxes in format `(x, y, w, h)` and returns a matrix of 129 | corresponding feature vectors. 130 | mot_dir : str 131 | Path to the MOTChallenge directory (can be either train or test). 132 | output_dir 133 | Path to the output directory. Will be created if it does not exist. 134 | detection_dir 135 | Path to custom detections. The directory structure should be the default 136 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the 137 | standard MOTChallenge detections. 138 | 139 | """ 140 | if detection_dir is None: 141 | detection_dir = mot_dir 142 | try: 143 | os.makedirs(output_dir) 144 | except OSError as exception: 145 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir): 146 | pass 147 | else: 148 | raise ValueError( 149 | "Failed to created output directory '%s'" % output_dir) 150 | 151 | for sequence in os.listdir(mot_dir): 152 | print("Processing %s" % sequence) 153 | sequence_dir = os.path.join(mot_dir, sequence) 154 | 155 | image_dir = os.path.join(sequence_dir, "img1") 156 | image_filenames = { 157 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f) 158 | for f in os.listdir(image_dir)} 159 | 160 | detection_file = os.path.join( 161 | detection_dir, sequence, "det/det.txt") 162 | detections_in = np.loadtxt(detection_file, delimiter=',') 163 | detections_out = [] 164 | 165 | frame_indices = detections_in[:, 0].astype(np.int) 166 | min_frame_idx = frame_indices.astype(np.int).min() 167 | max_frame_idx = frame_indices.astype(np.int).max() 168 | for frame_idx in range(min_frame_idx, max_frame_idx + 1): 169 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx)) 170 | mask = frame_indices == frame_idx 171 | rows = detections_in[mask] 172 | 173 | if frame_idx not in image_filenames: 174 | print("WARNING could not find image for frame %d" % frame_idx) 175 | continue 176 | bgr_image = cv2.imread( 177 | image_filenames[frame_idx], cv2.IMREAD_COLOR) 178 | features = encoder(bgr_image, rows[:, 2:6].copy()) 179 | detections_out += [np.r_[(row, feature)] for row, feature 180 | in zip(rows, features)] 181 | 182 | output_filename = os.path.join(output_dir, "%s.npy" % sequence) 183 | np.save( 184 | output_filename, np.asarray(detections_out), allow_pickle=False) 185 | 186 | 187 | def parse_args(): 188 | """Parse command line arguments. 189 | """ 190 | parser = argparse.ArgumentParser(description="Re-ID feature extractor") 191 | parser.add_argument( 192 | "--model", 193 | default="resources/networks/mars-small128.pb", 194 | help="Path to freezed inference graph protobuf.") 195 | parser.add_argument( 196 | "--mot_dir", help="Path to MOTChallenge directory (train or test)", 197 | required=True) 198 | parser.add_argument( 199 | "--detection_dir", help="Path to custom detections. Defaults to " 200 | "standard MOT detections Directory structure should be the default " 201 | "MOTChallenge structure: [sequence]/det/det.txt", default=None) 202 | parser.add_argument( 203 | "--output_dir", help="Output directory. Will be created if it does not" 204 | " exist.", default="detections") 205 | return parser.parse_args() 206 | 207 | 208 | def main(): 209 | args = parse_args() 210 | encoder = create_box_encoder(args.model, batch_size=32) 211 | generate_detections(encoder, args.mot_dir, args.output_dir, 212 | args.detection_dir) 213 | 214 | 215 | if __name__ == "__main__": 216 | main() 217 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/utils/__init__.py -------------------------------------------------------------------------------- /utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | 9 | 10 | def check_anchor_order(m): 11 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 12 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 13 | da = a[-1] - a[0] # delta a 14 | ds = m.stride[-1] - m.stride[0] # delta s 15 | if da.sign() != ds.sign(): # same order 16 | print('Reversing anchor order') 17 | m.anchors[:] = m.anchors.flip(0) 18 | m.anchor_grid[:] = m.anchor_grid.flip(0) 19 | 20 | 21 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 22 | # Check anchor fit to data, recompute if necessary 23 | print('\nAnalyzing anchors... ', end='') 24 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 25 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 26 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 27 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 28 | 29 | def metric(k): # compute metric 30 | r = wh[:, None] / k[None] 31 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 32 | best = x.max(1)[0] # best_x 33 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 34 | bpr = (best > 1. / thr).float().mean() # best possible recall 35 | return bpr, aat 36 | 37 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 38 | print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='') 39 | if bpr < 0.98: # threshold to recompute 40 | print('. Attempting to improve anchors, please wait...') 41 | na = m.anchor_grid.numel() // 2 # number of anchors 42 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 43 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 44 | if new_bpr > bpr: # replace anchors 45 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors) 46 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference 47 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 48 | check_anchor_order(m) 49 | print('New anchors saved to model. Update model *.yaml to use these anchors in the future.') 50 | else: 51 | print('Original anchors better than new anchors. Proceeding with original anchors.') 52 | print('') # newline 53 | 54 | 55 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 56 | """ Creates kmeans-evolved anchors from training dataset 57 | 58 | Arguments: 59 | path: path to dataset *.yaml, or a loaded dataset 60 | n: number of anchors 61 | img_size: image size used for training 62 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 63 | gen: generations to evolve anchors using genetic algorithm 64 | verbose: print all results 65 | 66 | Return: 67 | k: kmeans evolved anchors 68 | 69 | Usage: 70 | from utils.autoanchor import *; _ = kmean_anchors() 71 | """ 72 | thr = 1. / thr 73 | 74 | def metric(k, wh): # compute metrics 75 | r = wh[:, None] / k[None] 76 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 77 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 78 | return x, x.max(1)[0] # x, best_x 79 | 80 | def anchor_fitness(k): # mutation fitness 81 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 82 | return (best * (best > thr).float()).mean() # fitness 83 | 84 | def print_results(k): 85 | k = k[np.argsort(k.prod(1))] # sort small to large 86 | x, best = metric(k, wh0) 87 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 88 | print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat)) 89 | print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' % 90 | (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='') 91 | for i, x in enumerate(k): 92 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 93 | return k 94 | 95 | if isinstance(path, str): # *.yaml file 96 | with open(path) as f: 97 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict 98 | from utils.datasets import LoadImagesAndLabels 99 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 100 | else: 101 | dataset = path # dataset 102 | 103 | # Get label wh 104 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 105 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 106 | 107 | # Filter 108 | i = (wh0 < 3.0).any(1).sum() 109 | if i: 110 | print('WARNING: Extremely small objects found. ' 111 | '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0))) 112 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 113 | 114 | # Kmeans calculation 115 | print('Running kmeans for %g anchors on %g points...' % (n, len(wh))) 116 | s = wh.std(0) # sigmas for whitening 117 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 118 | k *= s 119 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 120 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 121 | k = print_results(k) 122 | 123 | # Plot 124 | # k, d = [None] * 20, [None] * 20 125 | # for i in tqdm(range(1, 21)): 126 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 127 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 128 | # ax = ax.ravel() 129 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 130 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 131 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 132 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 133 | # fig.savefig('wh.png', dpi=200) 134 | 135 | # Evolve 136 | npr = np.random 137 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 138 | pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar 139 | for _ in pbar: 140 | v = np.ones(sh) 141 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 142 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 143 | kg = (k.copy() * v).clip(min=2.0) 144 | fg = anchor_fitness(kg) 145 | if fg > f: 146 | f, k = fg, kg.copy() 147 | pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f 148 | if verbose: 149 | print_results(k) 150 | 151 | return print_results(k) 152 | -------------------------------------------------------------------------------- /utils/google_app_engine/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/google-appengine/python 2 | 3 | # Create a virtualenv for dependencies. This isolates these packages from 4 | # system-level packages. 5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2. 6 | RUN virtualenv /env -p python3 7 | 8 | # Setting these environment variables are the same as running 9 | # source /env/bin/activate. 10 | ENV VIRTUAL_ENV /env 11 | ENV PATH /env/bin:$PATH 12 | 13 | RUN apt-get update && apt-get install -y python-opencv 14 | 15 | # Copy the application's requirements.txt and run pip to install all 16 | # dependencies into the virtualenv. 17 | ADD requirements.txt /app/requirements.txt 18 | RUN pip install -r /app/requirements.txt 19 | 20 | # Add the application source code. 21 | ADD . /app 22 | 23 | # Run a WSGI server to serve the application. gunicorn must be declared as 24 | # a dependency in requirements.txt. 25 | CMD gunicorn -b :$PORT main:app 26 | -------------------------------------------------------------------------------- /utils/google_app_engine/additional_requirements.txt: -------------------------------------------------------------------------------- 1 | # add these requirements in your app on top of the existing ones 2 | pip==19.2 3 | Flask==2.3.2 4 | gunicorn==19.9.0 5 | -------------------------------------------------------------------------------- /utils/google_app_engine/app.yaml: -------------------------------------------------------------------------------- 1 | runtime: custom 2 | env: flex 3 | 4 | service: yolov5app 5 | 6 | liveness_check: 7 | initial_delay_sec: 600 8 | 9 | manual_scaling: 10 | instances: 1 11 | resources: 12 | cpu: 1 13 | memory_gb: 4 14 | disk_size_gb: 20 -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import torch 10 | 11 | 12 | def gsutil_getsize(url=''): 13 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 14 | s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8') 15 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes 16 | 17 | 18 | def attempt_download(weights): 19 | # Attempt to download pretrained weights if not found locally 20 | weights = str(weights).strip().replace("'", '') 21 | file = Path(weights).name.lower() 22 | 23 | msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/' 24 | models = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'] # available models 25 | redundant = False # offer second download option 26 | 27 | if file in models and not os.path.isfile(weights): 28 | # Google Drive 29 | # d = {'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO', 30 | # 'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr', 31 | # 'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV', 32 | # 'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS'} 33 | # r = gdrive_download(id=d[file], name=weights) if file in d else 1 34 | # if r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6: # check 35 | # return 36 | 37 | try: # GitHub 38 | url = 'https://github.com/ultralytics/yolov5/releases/download/v3.1/' + file 39 | print('Downloading %s to %s...' % (url, weights)) 40 | torch.hub.download_url_to_file(url, weights) 41 | assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check 42 | except Exception as e: # GCP 43 | print('Download error: %s' % e) 44 | assert redundant, 'No secondary mirror' 45 | url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file 46 | print('Downloading %s to %s...' % (url, weights)) 47 | r = os.system('curl -L %s -o %s' % (url, weights)) # torch.hub.download_url_to_file(url, weights) 48 | finally: 49 | if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6): # check 50 | os.remove(weights) if os.path.exists(weights) else None # remove partial downloads 51 | print('ERROR: Download failure: %s' % msg) 52 | print('') 53 | return 54 | 55 | 56 | def gdrive_download(id='1uH2BylpFxHKEGXKL6wJJlsgMU2YEjxuc', name='tmp.zip'): 57 | # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download() 58 | t = time.time() 59 | 60 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 61 | os.remove(name) if os.path.exists(name) else None # remove existing 62 | os.remove('cookie') if os.path.exists('cookie') else None 63 | 64 | # Attempt file download 65 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 66 | os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out)) 67 | if os.path.exists('cookie'): # large file 68 | s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name) 69 | else: # small file 70 | s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id) 71 | r = os.system(s) # execute, capture return 72 | os.remove('cookie') if os.path.exists('cookie') else None 73 | 74 | # Error check 75 | if r != 0: 76 | os.remove(name) if os.path.exists(name) else None # remove partial 77 | print('Download error ') # raise Exception('Download error') 78 | return r 79 | 80 | # Unzip if archive 81 | if name.endswith('.zip'): 82 | print('unzipping... ', end='') 83 | os.system('unzip -q %s' % name) # unzip 84 | os.remove(name) # remove zip to free space 85 | 86 | print('Done (%.1fs)' % (time.time() - t)) 87 | return r 88 | 89 | 90 | def get_token(cookie="./cookie"): 91 | with open(cookie) as f: 92 | for line in f: 93 | if "download" in line: 94 | return line.split()[-1] 95 | return "" 96 | 97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 98 | # # Uploads a file to a bucket 99 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 100 | # 101 | # storage_client = storage.Client() 102 | # bucket = storage_client.get_bucket(bucket_name) 103 | # blob = bucket.blob(destination_blob_name) 104 | # 105 | # blob.upload_from_filename(source_file_name) 106 | # 107 | # print('File {} uploaded to {}.'.format( 108 | # source_file_name, 109 | # destination_blob_name)) 110 | # 111 | # 112 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 113 | # # Uploads a blob from a bucket 114 | # storage_client = storage.Client() 115 | # bucket = storage_client.get_bucket(bucket_name) 116 | # blob = bucket.blob(source_blob_name) 117 | # 118 | # blob.download_to_filename(destination_file_name) 119 | # 120 | # print('Blob {} downloaded to {}.'.format( 121 | # source_blob_name, 122 | # destination_file_name)) 123 | -------------------------------------------------------------------------------- /utils/loss.py: -------------------------------------------------------------------------------- 1 | # Loss functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from utils.general import bbox_iou 7 | from utils.torch_utils import is_parallel 8 | 9 | 10 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 11 | # return positive, negative label smoothing BCE targets 12 | return 1.0 - 0.5 * eps, 0.5 * eps 13 | 14 | 15 | class BCEBlurWithLogitsLoss(nn.Module): 16 | # BCEwithLogitLoss() with reduced missing label effects. 17 | def __init__(self, alpha=0.05): 18 | super(BCEBlurWithLogitsLoss, self).__init__() 19 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() 20 | self.alpha = alpha 21 | 22 | def forward(self, pred, true): 23 | loss = self.loss_fcn(pred, true) 24 | pred = torch.sigmoid(pred) # prob from logits 25 | dx = pred - true # reduce only missing label effects 26 | # dx = (pred - true).abs() # reduce missing label and false label effects 27 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) 28 | loss *= alpha_factor 29 | return loss.mean() 30 | 31 | 32 | class FocalLoss(nn.Module): 33 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 34 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 35 | super(FocalLoss, self).__init__() 36 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 37 | self.gamma = gamma 38 | self.alpha = alpha 39 | self.reduction = loss_fcn.reduction 40 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 41 | 42 | def forward(self, pred, true): 43 | loss = self.loss_fcn(pred, true) 44 | # p_t = torch.exp(-loss) 45 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability 46 | 47 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py 48 | pred_prob = torch.sigmoid(pred) # prob from logits 49 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob) 50 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 51 | modulating_factor = (1.0 - p_t) ** self.gamma 52 | loss *= alpha_factor * modulating_factor 53 | 54 | if self.reduction == 'mean': 55 | return loss.mean() 56 | elif self.reduction == 'sum': 57 | return loss.sum() 58 | else: # 'none' 59 | return loss 60 | 61 | 62 | class QFocalLoss(nn.Module): 63 | # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) 64 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): 65 | super(QFocalLoss, self).__init__() 66 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() 67 | self.gamma = gamma 68 | self.alpha = alpha 69 | self.reduction = loss_fcn.reduction 70 | self.loss_fcn.reduction = 'none' # required to apply FL to each element 71 | 72 | def forward(self, pred, true): 73 | loss = self.loss_fcn(pred, true) 74 | 75 | pred_prob = torch.sigmoid(pred) # prob from logits 76 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) 77 | modulating_factor = torch.abs(true - pred_prob) ** self.gamma 78 | loss *= alpha_factor * modulating_factor 79 | 80 | if self.reduction == 'mean': 81 | return loss.mean() 82 | elif self.reduction == 'sum': 83 | return loss.sum() 84 | else: # 'none' 85 | return loss 86 | 87 | 88 | def compute_loss(p, targets, model): # predictions, targets, model 89 | device = targets.device 90 | lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) 91 | tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets 92 | h = model.hyp # hyperparameters 93 | 94 | # Define criteria 95 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) # weight=model.class_weights) 96 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device)) 97 | 98 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 99 | cp, cn = smooth_BCE(eps=0.0) 100 | 101 | # Focal loss 102 | g = h['fl_gamma'] # focal loss gamma 103 | if g > 0: 104 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) 105 | 106 | # Losses 107 | nt = 0 # number of targets 108 | no = len(p) # number of outputs 109 | balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6 110 | for i, pi in enumerate(p): # layer index, layer predictions 111 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx 112 | tobj = torch.zeros_like(pi[..., 0], device=device) # target obj 113 | 114 | n = b.shape[0] # number of targets 115 | if n: 116 | nt += n # cumulative targets 117 | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets 118 | 119 | # Regression 120 | pxy = ps[:, :2].sigmoid() * 2. - 0.5 121 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] 122 | pbox = torch.cat((pxy, pwh), 1) # predicted box 123 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) 124 | lbox += (1.0 - iou).mean() # iou loss 125 | 126 | # Objectness 127 | tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio 128 | 129 | # Classification 130 | if model.nc > 1: # cls loss (only if multiple classes) 131 | t = torch.full_like(ps[:, 5:], cn, device=device) # targets 132 | t[range(n), tcls[i]] = cp 133 | lcls += BCEcls(ps[:, 5:], t) # BCE 134 | 135 | # Append targets to text file 136 | # with open('targets.txt', 'a') as file: 137 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)] 138 | 139 | lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss 140 | 141 | s = 3 / no # output count scaling 142 | lbox *= h['box'] * s 143 | lobj *= h['obj'] * s * (1.4 if no == 4 else 1.) 144 | lcls *= h['cls'] * s 145 | bs = tobj.shape[0] # batch size 146 | 147 | loss = lbox + lobj + lcls 148 | return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach() 149 | 150 | 151 | def build_targets(p, targets, model): 152 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h) 153 | det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module 154 | na, nt = det.na, targets.shape[0] # number of anchors, targets 155 | tcls, tbox, indices, anch = [], [], [], [] 156 | gain = torch.ones(7, device=targets.device) # normalized to gridspace gain 157 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) 158 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices 159 | 160 | g = 0.5 # bias 161 | off = torch.tensor([[0, 0], 162 | [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m 163 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm 164 | ], device=targets.device).float() * g # offsets 165 | 166 | for i in range(det.nl): 167 | anchors = det.anchors[i] 168 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain 169 | 170 | # Match targets to anchors 171 | t = targets * gain 172 | if nt: 173 | # Matches 174 | r = t[:, :, 4:6] / anchors[:, None] # wh ratio 175 | j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare 176 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) 177 | t = t[j] # filter 178 | 179 | # Offsets 180 | gxy = t[:, 2:4] # grid xy 181 | gxi = gain[[2, 3]] - gxy # inverse 182 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T 183 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T 184 | j = torch.stack((torch.ones_like(j), j, k, l, m)) 185 | t = t.repeat((5, 1, 1))[j] 186 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] 187 | else: 188 | t = targets[0] 189 | offsets = 0 190 | 191 | # Define 192 | b, c = t[:, :2].long().T # image, class 193 | gxy = t[:, 2:4] # grid xy 194 | gwh = t[:, 4:6] # grid wh 195 | gij = (gxy - offsets).long() 196 | gi, gj = gij.T # grid xy indices 197 | 198 | # Append 199 | a = t[:, 6].long() # anchor indices 200 | indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices 201 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box 202 | anch.append(anchors[a]) # anchors 203 | tcls.append(c) # class 204 | 205 | return tcls, tbox, indices, anch 206 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]): 19 | """ Compute the average precision, given the recall and precision curves. 20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 21 | # Arguments 22 | tp: True positives (nparray, nx1 or nx10). 23 | conf: Objectness value from 0-1 (nparray). 24 | pred_cls: Predicted object classes (nparray). 25 | target_cls: True object classes (nparray). 26 | plot: Plot precision-recall curve at mAP@0.5 27 | save_dir: Plot save directory 28 | # Returns 29 | The average precision as computed in py-faster-rcnn. 30 | """ 31 | 32 | # Sort by objectness 33 | i = np.argsort(-conf) 34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 35 | 36 | # Find unique classes 37 | unique_classes = np.unique(target_cls) 38 | 39 | # Create Precision-Recall curve and compute AP for each class 40 | px, py = np.linspace(0, 1, 1000), [] # for plotting 41 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 42 | s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) 43 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) 44 | for ci, c in enumerate(unique_classes): 45 | i = pred_cls == c 46 | n_l = (target_cls == c).sum() # number of labels 47 | n_p = i.sum() # number of predictions 48 | 49 | if n_p == 0 or n_l == 0: 50 | continue 51 | else: 52 | # Accumulate FPs and TPs 53 | fpc = (1 - tp[i]).cumsum(0) 54 | tpc = tp[i].cumsum(0) 55 | 56 | # Recall 57 | recall = tpc / (n_l + 1e-16) # recall curve 58 | r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases 59 | 60 | # Precision 61 | precision = tpc / (tpc + fpc) # precision curve 62 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score 63 | 64 | # AP from recall-precision curve 65 | for j in range(tp.shape[1]): 66 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 67 | if plot and (j == 0): 68 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 69 | 70 | # Compute F1 score (harmonic mean of precision and recall) 71 | f1 = 2 * p * r / (p + r + 1e-16) 72 | 73 | if plot: 74 | plot_pr_curve(px, py, ap, save_dir, names) 75 | 76 | return p, r, ap, f1, unique_classes.astype('int32') 77 | 78 | 79 | def compute_ap(recall, precision): 80 | """ Compute the average precision, given the recall and precision curves 81 | # Arguments 82 | recall: The recall curve (list) 83 | precision: The precision curve (list) 84 | # Returns 85 | Average precision, precision curve, recall curve 86 | """ 87 | 88 | # Append sentinel values to beginning and end 89 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01])) 90 | mpre = np.concatenate(([1.], precision, [0.])) 91 | 92 | # Compute the precision envelope 93 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 94 | 95 | # Integrate area under curve 96 | method = 'interp' # methods: 'continuous', 'interp' 97 | if method == 'interp': 98 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 99 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 100 | else: # 'continuous' 101 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 102 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 103 | 104 | return ap, mpre, mrec 105 | 106 | 107 | class ConfusionMatrix: 108 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 109 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 110 | self.matrix = np.zeros((nc + 1, nc + 1)) 111 | self.nc = nc # number of classes 112 | self.conf = conf 113 | self.iou_thres = iou_thres 114 | 115 | def process_batch(self, detections, labels): 116 | """ 117 | Return intersection-over-union (Jaccard index) of boxes. 118 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 119 | Arguments: 120 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 121 | labels (Array[M, 5]), class, x1, y1, x2, y2 122 | Returns: 123 | None, updates confusion matrix accordingly 124 | """ 125 | detections = detections[detections[:, 4] > self.conf] 126 | gt_classes = labels[:, 0].int() 127 | detection_classes = detections[:, 5].int() 128 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 129 | 130 | x = torch.where(iou > self.iou_thres) 131 | if x[0].shape[0]: 132 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() 133 | if x[0].shape[0] > 1: 134 | matches = matches[matches[:, 2].argsort()[::-1]] 135 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 136 | matches = matches[matches[:, 2].argsort()[::-1]] 137 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 138 | else: 139 | matches = np.zeros((0, 3)) 140 | 141 | n = matches.shape[0] > 0 142 | m0, m1, _ = matches.transpose().astype(np.int16) 143 | for i, gc in enumerate(gt_classes): 144 | j = m0 == i 145 | if n and sum(j) == 1: 146 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 147 | else: 148 | self.matrix[gc, self.nc] += 1 # background FP 149 | 150 | if n: 151 | for i, dc in enumerate(detection_classes): 152 | if not any(m1 == i): 153 | self.matrix[self.nc, dc] += 1 # background FN 154 | 155 | def matrix(self): 156 | return self.matrix 157 | 158 | def plot(self, save_dir='', names=()): 159 | try: 160 | import seaborn as sn 161 | 162 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize 163 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 164 | 165 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 166 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 167 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels 168 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True, 169 | xticklabels=names + ['background FN'] if labels else "auto", 170 | yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1)) 171 | fig.axes[0].set_xlabel('True') 172 | fig.axes[0].set_ylabel('Predicted') 173 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250) 174 | except Exception as e: 175 | pass 176 | 177 | def print(self): 178 | for i in range(self.nc + 1): 179 | print(' '.join(map(str, self.matrix[i]))) 180 | 181 | 182 | # Plots ---------------------------------------------------------------------------------------------------------------- 183 | 184 | def plot_pr_curve(px, py, ap, save_dir='.', names=()): 185 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 186 | py = np.stack(py, axis=1) 187 | 188 | if 0 < len(names) < 21: # show mAP in legend if < 10 classes 189 | for i, y in enumerate(py.T): 190 | ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision) 191 | else: 192 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision) 193 | 194 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean()) 195 | ax.set_xlabel('Recall') 196 | ax.set_ylabel('Precision') 197 | ax.set_xlim(0, 1) 198 | ax.set_ylim(0, 1) 199 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 200 | fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250) 201 | -------------------------------------------------------------------------------- /utils/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/utils/models/__init__.py -------------------------------------------------------------------------------- /utils/models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from models.common import Conv, DWConv 8 | from utils.google_utils import attempt_download 9 | 10 | 11 | class CrossConv(nn.Module): 12 | # Cross Convolution Downsample 13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 15 | super(CrossConv, self).__init__() 16 | c_ = int(c2 * e) # hidden channels 17 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 19 | self.add = shortcut and c1 == c2 20 | 21 | def forward(self, x): 22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 23 | 24 | 25 | class Sum(nn.Module): 26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 27 | def __init__(self, n, weight=False): # n: number of inputs 28 | super(Sum, self).__init__() 29 | self.weight = weight # apply weights boolean 30 | self.iter = range(n - 1) # iter object 31 | if weight: 32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights 33 | 34 | def forward(self, x): 35 | y = x[0] # no weight 36 | if self.weight: 37 | w = torch.sigmoid(self.w) * 2 38 | for i in self.iter: 39 | y = y + x[i + 1] * w[i] 40 | else: 41 | for i in self.iter: 42 | y = y + x[i + 1] 43 | return y 44 | 45 | 46 | class GhostConv(nn.Module): 47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups 49 | super(GhostConv, self).__init__() 50 | c_ = c2 // 2 # hidden channels 51 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 53 | 54 | def forward(self, x): 55 | y = self.cv1(x) 56 | return torch.cat([y, self.cv2(y)], 1) 57 | 58 | 59 | class GhostBottleneck(nn.Module): 60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 61 | def __init__(self, c1, c2, k, s): 62 | super(GhostBottleneck, self).__init__() 63 | c_ = c2 // 2 64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw 65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear 67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), 68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity() 69 | 70 | def forward(self, x): 71 | return self.conv(x) + self.shortcut(x) 72 | 73 | 74 | class MixConv2d(nn.Module): 75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 77 | super(MixConv2d, self).__init__() 78 | groups = len(k) 79 | if equal_ch: # equal c_ per group 80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices 81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 82 | else: # equal weight.numel() per group 83 | b = [c2] + [0] * groups 84 | a = np.eye(groups + 1, groups, k=-1) 85 | a -= np.roll(a, 1, axis=1) 86 | a *= np.array(k) ** 2 87 | a[0] = 1 88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b 89 | 90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)]) 91 | self.bn = nn.BatchNorm2d(c2) 92 | self.act = nn.LeakyReLU(0.1, inplace=True) 93 | 94 | def forward(self, x): 95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 96 | 97 | 98 | class Ensemble(nn.ModuleList): 99 | # Ensemble of models 100 | def __init__(self): 101 | super(Ensemble, self).__init__() 102 | 103 | def forward(self, x, augment=False): 104 | y = [] 105 | for module in self: 106 | y.append(module(x, augment)[0]) 107 | # y = torch.stack(y).max(0)[0] # max ensemble 108 | # y = torch.cat(y, 1) # nms ensemble 109 | y = torch.stack(y).mean(0) # mean ensemble 110 | return y, None # inference, train output 111 | 112 | 113 | def attempt_load(weights, map_location=None): 114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 115 | model = Ensemble() 116 | for w in weights if isinstance(weights, list) else [weights]: 117 | attempt_download(w) 118 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model 119 | 120 | # Compatibility updates 121 | for m in model.modules(): 122 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 123 | m.inplace = True # pytorch 1.7.0 compatibility 124 | elif type(m) is Conv: 125 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 126 | 127 | if len(model) == 1: 128 | return model[-1] # return model 129 | else: 130 | print('Ensemble created with %s\n' % weights) 131 | for k in ['names', 'stride']: 132 | setattr(model, k, getattr(model[-1], k)) 133 | return model # return ensemble 134 | -------------------------------------------------------------------------------- /utils/models/export.py: -------------------------------------------------------------------------------- 1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats 2 | 3 | Usage: 4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1 5 | """ 6 | 7 | import argparse 8 | import sys 9 | import time 10 | 11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories 12 | 13 | import torch 14 | import torch.nn as nn 15 | 16 | import models 17 | from models.experimental import attempt_load 18 | from utils.activations import Hardswish, SiLU 19 | from utils.general import set_logging, check_img_size 20 | 21 | if __name__ == '__main__': 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/ 24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width 25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size') 26 | opt = parser.parse_args() 27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand 28 | print(opt) 29 | set_logging() 30 | t = time.time() 31 | 32 | # Load PyTorch model 33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model 34 | labels = model.names 35 | 36 | # Checks 37 | gs = int(max(model.stride)) # grid size (max stride) 38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples 39 | 40 | # Input 41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection 42 | 43 | # Update model 44 | for k, m in model.named_modules(): 45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 46 | if isinstance(m, models.common.Conv): # assign export-friendly activations 47 | if isinstance(m.act, nn.Hardswish): 48 | m.act = Hardswish() 49 | elif isinstance(m.act, nn.SiLU): 50 | m.act = SiLU() 51 | # elif isinstance(m, models.yolo.Detect): 52 | # m.forward = m.forward_export # assign forward (optional) 53 | model.model[-1].export = True # set Detect() layer export=True 54 | y = model(img) # dry run 55 | 56 | # TorchScript export 57 | try: 58 | print('\nStarting TorchScript export with torch %s...' % torch.__version__) 59 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename 60 | ts = torch.jit.trace(model, img) 61 | ts.save(f) 62 | print('TorchScript export success, saved as %s' % f) 63 | except Exception as e: 64 | print('TorchScript export failure: %s' % e) 65 | 66 | # ONNX export 67 | try: 68 | import onnx 69 | 70 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__) 71 | f = opt.weights.replace('.pt', '.onnx') # filename 72 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'], 73 | output_names=['classes', 'boxes'] if y is None else ['output']) 74 | 75 | # Checks 76 | onnx_model = onnx.load(f) # load onnx model 77 | onnx.checker.check_model(onnx_model) # check onnx model 78 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model 79 | print('ONNX export success, saved as %s' % f) 80 | except Exception as e: 81 | print('ONNX export failure: %s' % e) 82 | 83 | # CoreML export 84 | try: 85 | import coremltools as ct 86 | 87 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__) 88 | # convert model from torchscript and apply pixel scaling as per detect.py 89 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])]) 90 | f = opt.weights.replace('.pt', '.mlmodel') # filename 91 | model.save(f) 92 | print('CoreML export success, saved as %s' % f) 93 | except Exception as e: 94 | print('CoreML export failure: %s' % e) 95 | 96 | # Finish 97 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t)) 98 | -------------------------------------------------------------------------------- /utils/models/hub/yolov3-spp.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3-SPP head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, SPP, [512, [5, 9, 13]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /utils/models/hub/yolov3-tiny.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,14, 23,27, 37,58] # P4/16 9 | - [81,82, 135,169, 344,319] # P5/32 10 | 11 | # YOLOv3-tiny backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [[-1, 1, Conv, [16, 3, 1]], # 0 15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2 16 | [-1, 1, Conv, [32, 3, 1]], 17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4 18 | [-1, 1, Conv, [64, 3, 1]], 19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8 20 | [-1, 1, Conv, [128, 3, 1]], 21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16 22 | [-1, 1, Conv, [256, 3, 1]], 23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32 24 | [-1, 1, Conv, [512, 3, 1]], 25 | [-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]], # 11 26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12 27 | ] 28 | 29 | # YOLOv3-tiny head 30 | head: 31 | [[-1, 1, Conv, [1024, 3, 1]], 32 | [-1, 1, Conv, [256, 1, 1]], 33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large) 34 | 35 | [-2, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium) 39 | 40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5) 41 | ] 42 | -------------------------------------------------------------------------------- /utils/models/hub/yolov3.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # darknet53 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Conv, [32, 3, 1]], # 0 16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2 17 | [-1, 1, Bottleneck, [64]], 18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4 19 | [-1, 2, Bottleneck, [128]], 20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8 21 | [-1, 8, Bottleneck, [256]], 22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16 23 | [-1, 8, Bottleneck, [512]], 24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32 25 | [-1, 4, Bottleneck, [1024]], # 10 26 | ] 27 | 28 | # YOLOv3 head 29 | head: 30 | [[-1, 1, Bottleneck, [1024, False]], 31 | [-1, 1, Conv, [512, [1, 1]]], 32 | [-1, 1, Conv, [1024, 3, 1]], 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large) 35 | 36 | [-2, 1, Conv, [256, 1, 1]], 37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4 39 | [-1, 1, Bottleneck, [512, False]], 40 | [-1, 1, Bottleneck, [512, False]], 41 | [-1, 1, Conv, [256, 1, 1]], 42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium) 43 | 44 | [-2, 1, Conv, [128, 1, 1]], 45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3 47 | [-1, 1, Bottleneck, [256, False]], 48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small) 49 | 50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 51 | ] 52 | -------------------------------------------------------------------------------- /utils/models/hub/yolov5-fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, Bottleneck, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 6, BottleneckCSP, [1024]], # 9 25 | ] 26 | 27 | # YOLOv5 FPN head 28 | head: 29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) 30 | 31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 33 | [-1, 1, Conv, [512, 1, 1]], 34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) 35 | 36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 38 | [-1, 1, Conv, [256, 1, 1]], 39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) 40 | 41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 42 | ] 43 | -------------------------------------------------------------------------------- /utils/models/hub/yolov5-panet.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 PANet head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /utils/models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /utils/models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /utils/models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /utils/models/yolov5x.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 1.33 # model depth multiple 4 | width_multiple: 1.25 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [10,13, 16,30, 33,23] # P3/8 9 | - [30,61, 62,45, 59,119] # P4/16 10 | - [116,90, 156,198, 373,326] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 17 | [-1, 3, BottleneckCSP, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 19 | [-1, 9, BottleneckCSP, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 21 | [-1, 9, BottleneckCSP, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 23 | [-1, 1, SPP, [1024, [5, 9, 13]]], 24 | [-1, 3, BottleneckCSP, [1024, False]], # 9 25 | ] 26 | 27 | # YOLOv5 head 28 | head: 29 | [[-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, BottleneckCSP, [512, False]], # 13 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']], 36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 14], 1, Concat, [1]], # cat head P4 41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 10], 1, Concat, [1]], # cat head P5 45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) 46 | 47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /utils/roboflow.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import base64 3 | import io 4 | import cv2 5 | from PIL import Image 6 | import numpy as np 7 | 8 | 9 | def predict_image(image, api_key, url, confidence, overlap, idx): 10 | retval, buffer = cv2.imencode('.jpg', image) 11 | img_str = base64.b64encode(buffer) 12 | img_str = img_str.decode("ascii") 13 | 14 | # Construct the URL 15 | upload_url = "".join([ 16 | url, 17 | "?api_key=", 18 | api_key, 19 | "&confidence=", 20 | str(confidence), 21 | "&overlap=", 22 | str(overlap), 23 | "&name=", 24 | str(idx), 25 | ".jpg" 26 | ]) 27 | 28 | # POST to the API 29 | r = requests.post(upload_url, data=img_str, headers={ 30 | "Content-Type": "application/x-www-form-urlencoded" 31 | }) 32 | 33 | json = r.json() 34 | 35 | predictions = json["predictions"] 36 | formatted_predictions = [] 37 | classes = [] 38 | 39 | for pred in predictions: 40 | formatted_pred = [pred["x"], pred["y"], pred["width"], pred["height"], pred["confidence"]] 41 | 42 | # convert to top-left x/y from center 43 | formatted_pred[0] -= formatted_pred[2]/2 44 | formatted_pred[1] -= formatted_pred[3]/2 45 | 46 | formatted_predictions.append(formatted_pred) 47 | classes.append(pred["class"]) 48 | 49 | #print(formatted_predictions) 50 | 51 | return formatted_predictions, classes 52 | -------------------------------------------------------------------------------- /utils/yolov4.py: -------------------------------------------------------------------------------- 1 | from tool.utils import * 2 | from tool.torch_utils import * 3 | from tool.darknet2pytorch import Darknet 4 | from utils.general import non_max_suppression, xyxy2xywh 5 | import cv2 6 | import torch 7 | 8 | class Yolov4Engine: 9 | def __init__(self, weights, cfgfile, device, names, classes, conf_thres, iou_thres, agnostic_nms, augment, half): 10 | self.model = Darknet(cfgfile) 11 | self.model.load_weights(weights[0]) 12 | self.device = device 13 | 14 | if self.device != "cpu": 15 | self.model.cuda() 16 | 17 | self.classes = classes 18 | self.names = load_class_names(names) 19 | self.conf_thres = conf_thres 20 | self.iou_thres = iou_thres 21 | self.augment = augment 22 | self.agnostic_nms = agnostic_nms 23 | 24 | def infer(self, img): 25 | img_resized = cv2.resize(img, (self.model.width, self.model.height)) 26 | pred = do_detect(self.model, img_resized, self.conf_thres, self.iou_thres, self.device != "cpu")[0] 27 | return np.array(pred) 28 | 29 | def postprocess(self, pred, img_shape): 30 | height = img_shape[0] 31 | width = img_shape[1] 32 | classes = pred[:, 6].tolist() 33 | for i, cls in enumerate(classes): 34 | classes[i] = self.names[int(cls)] 35 | 36 | dets = pred[:, :5] 37 | for i, det in enumerate(dets): 38 | box = det 39 | x1 = int(box[0] * width) 40 | y1 = int(box[1] * height) 41 | x2 = int(box[2] * width) 42 | y2 = int(box[3] * height) 43 | newDet = [x1,y2-(y2-y1),x2-x1,y2-y1,det[4]] 44 | dets[i] = newDet 45 | return pred, classes 46 | 47 | 48 | def nms(self, pred): 49 | out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) 50 | return out 51 | -------------------------------------------------------------------------------- /utils/yolov5.py: -------------------------------------------------------------------------------- 1 | from models.experimental import attempt_load 2 | from utils.general import non_max_suppression 3 | 4 | class Yolov5Engine: 5 | def __init__(self, weights, device, classes, conf_thres, iou_thres, agnostic_nms, augment, half): 6 | self.model = attempt_load(weights, map_location=device) 7 | if half: 8 | self.model.half() 9 | self.classes = classes 10 | self.conf_thres = conf_thres 11 | self.iou_thres = iou_thres 12 | self.augment = augment 13 | self.agnostic_nms = agnostic_nms 14 | 15 | def infer(self, img): 16 | pred = self.model(img, augment=self.augment)[0] 17 | pred = self.nms(pred) 18 | return pred 19 | 20 | def nms(self, pred): 21 | out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) 22 | return out 23 | 24 | def get_names(self): 25 | return self.model.module.names if hasattr(self.model, 'module') else self.model.names -------------------------------------------------------------------------------- /utils/yolov7.py: -------------------------------------------------------------------------------- 1 | from models.experimental import attempt_load 2 | from utils.general import non_max_suppression 3 | 4 | class Yolov7Engine: 5 | def __init__(self, weights, device, classes, conf_thres, iou_thres, agnostic_nms, augment, half): 6 | self.model = attempt_load(weights, map_location=device) 7 | if half: 8 | self.model.half() 9 | self.classes = classes 10 | self.conf_thres = conf_thres 11 | self.iou_thres = iou_thres 12 | self.augment = augment 13 | self.agnostic_nms = agnostic_nms 14 | 15 | def infer(self, img): 16 | pred = self.model(img, augment=self.augment)[0] 17 | pred = self.nms(pred) 18 | return pred 19 | 20 | def nms(self, pred): 21 | out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) 22 | return out 23 | 24 | def get_names(self): 25 | return self.model.module.names if hasattr(self.model, 'module') else self.model.names --------------------------------------------------------------------------------