├── .gitignore
├── LICENSE
├── README.md
├── clip_object_tracker.py
├── coco.names
├── cv.yml
├── data
├── coco.yaml
├── coco128.yaml
├── hyp.finetune.yaml
├── hyp.scratch.yaml
├── images
│ ├── bus.jpg
│ └── zidane.jpg
├── scripts
│ ├── get_coco.sh
│ └── get_voc.sh
├── video
│ ├── cars.mp4
│ ├── fish.mp4
│ └── test.mp4
└── voc.yaml
├── deep_sort
├── __init__.py
├── detection.py
├── iou_matching.py
├── kalman_filter.py
├── linear_assignment.py
├── nn_matching.py
├── preprocessing.py
├── track.py
└── tracker.py
├── example
└── video
│ └── fish.mp4
├── model_data
└── mars-small128.pb
├── models
├── __init__.py
├── common.py
├── experimental.py
├── export.py
├── hub
│ ├── yolov3-spp.yaml
│ ├── yolov3-tiny.yaml
│ ├── yolov3.yaml
│ ├── yolov5-fpn.yaml
│ └── yolov5-panet.yaml
├── yolo.py
├── yolov5l.yaml
├── yolov5m.yaml
├── yolov5s.yaml
└── yolov5x.yaml
├── requirements.txt
├── tool
├── config.py
├── darknet2pytorch.py
├── region_loss.py
├── torch_utils.py
├── utils.py
├── utils_iou.py
└── yolo_layer.py
├── tools
├── freeze_model.py
├── generate_clip_detections.py
└── generate_detections.py
└── utils
├── __init__.py
├── activations.py
├── autoanchor.py
├── datasets.py
├── general.py
├── google_app_engine
├── Dockerfile
├── additional_requirements.txt
└── app.yaml
├── google_utils.py
├── loss.py
├── metrics.py
├── models
├── __init__.py
├── common.py
├── experimental.py
├── export.py
├── hub
│ ├── yolov3-spp.yaml
│ ├── yolov3-tiny.yaml
│ ├── yolov3.yaml
│ ├── yolov5-fpn.yaml
│ └── yolov5-panet.yaml
├── yolo.py
├── yolov5l.yaml
├── yolov5m.yaml
├── yolov5s.yaml
└── yolov5x.yaml
├── plots.py
├── roboflow.py
├── torch_utils.py
├── yolov4.py
├── yolov5.py
└── yolov7.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
2 | *.cfg
3 | !cfg/yolov3*.cfg
4 |
5 | storage.googleapis.com
6 | runs/*
7 | !data/images/zidane.jpg
8 | !data/images/bus.jpg
9 | !data/coco.names
10 | !data/coco_paper.names
11 | !data/coco.data
12 | !data/coco_*.data
13 | !data/coco_*.txt
14 | !data/trainvalno5k.shapes
15 | !data/*.sh
16 |
17 | pycocotools/*
18 | results*.txt
19 | gcp_test*.sh
20 |
21 | # Datasets -------------------------------------------------------------------------------------------------------------
22 | coco/
23 | coco128/
24 | VOC/
25 |
26 | # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
27 | *.m~
28 | *.mat
29 | !targets*.mat
30 |
31 | # Neural Network weights -----------------------------------------------------------------------------------------------
32 | *.weights
33 | *.pt
34 | *.onnx
35 | *.mlmodel
36 | *.torchscript
37 | darknet53.conv.74
38 | yolov3-tiny.conv.15
39 |
40 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
41 | # Byte-compiled / optimized / DLL files
42 | __pycache__/
43 | *.py[cod]
44 | *$py.class
45 |
46 | # C extensions
47 | *.so
48 |
49 | # Distribution / packaging
50 | .Python
51 | env/
52 | build/
53 | develop-eggs/
54 | dist/
55 | downloads/
56 | eggs/
57 | .eggs/
58 | lib/
59 | lib64/
60 | parts/
61 | sdist/
62 | var/
63 | wheels/
64 | *.egg-info/
65 | wandb/
66 | .installed.cfg
67 | *.egg
68 |
69 |
70 | # PyInstaller
71 | # Usually these files are written by a python script from a template
72 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
73 | *.manifest
74 | *.spec
75 |
76 | # Installer logs
77 | pip-log.txt
78 | pip-delete-this-directory.txt
79 |
80 | # Unit test / coverage reports
81 | htmlcov/
82 | .tox/
83 | .coverage
84 | .coverage.*
85 | .cache
86 | nosetests.xml
87 | coverage.xml
88 | *.cover
89 | .hypothesis/
90 |
91 | # Translations
92 | *.mo
93 | *.pot
94 |
95 | # Django stuff:
96 | *.log
97 | local_settings.py
98 |
99 | # Flask stuff:
100 | instance/
101 | .webassets-cache
102 |
103 | # Scrapy stuff:
104 | .scrapy
105 |
106 | # Sphinx documentation
107 | docs/_build/
108 |
109 | # PyBuilder
110 | target/
111 |
112 | # Jupyter Notebook
113 | .ipynb_checkpoints
114 |
115 | # pyenv
116 | .python-version
117 |
118 | # celery beat schedule file
119 | celerybeat-schedule
120 |
121 | # SageMath parsed files
122 | *.sage.py
123 |
124 | # dotenv
125 | .env
126 |
127 | # virtualenv
128 | .venv*
129 | venv*/
130 | ENV*/
131 |
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 |
136 | # Rope project settings
137 | .ropeproject
138 |
139 | # mkdocs documentation
140 | /site
141 |
142 | # mypy
143 | .mypy_cache/
144 |
145 |
146 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
147 |
148 | # General
149 | .DS_Store
150 | .AppleDouble
151 | .LSOverride
152 |
153 | # Icon must end with two \r
154 | Icon
155 | Icon?
156 |
157 | # Thumbnails
158 | ._*
159 |
160 | # Files that might appear in the root of a volume
161 | .DocumentRevisions-V100
162 | .fseventsd
163 | .Spotlight-V100
164 | .TemporaryItems
165 | .Trashes
166 | .VolumeIcon.icns
167 | .com.apple.timemachine.donotpresent
168 |
169 | # Directories potentially created on remote AFP share
170 | .AppleDB
171 | .AppleDesktop
172 | Network Trash Folder
173 | Temporary Items
174 | .apdisk
175 |
176 |
177 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
178 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
179 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
180 |
181 | # User-specific stuff:
182 | .idea/*
183 | .idea/**/workspace.xml
184 | .idea/**/tasks.xml
185 | .idea/dictionaries
186 | .html # Bokeh Plots
187 | .pg # TensorFlow Frozen Graphs
188 | .avi # videos
189 |
190 | # Sensitive or high-churn files:
191 | .idea/**/dataSources/
192 | .idea/**/dataSources.ids
193 | .idea/**/dataSources.local.xml
194 | .idea/**/sqlDataSources.xml
195 | .idea/**/dynamic.xml
196 | .idea/**/uiDesigner.xml
197 |
198 | # Gradle:
199 | .idea/**/gradle.xml
200 | .idea/**/libraries
201 |
202 | # CMake
203 | cmake-build-debug/
204 | cmake-build-release/
205 |
206 | # Mongo Explorer plugin:
207 | .idea/**/mongoSettings.xml
208 |
209 | ## File-based project format:
210 | *.iws
211 |
212 | ## Plugin-specific files:
213 |
214 | # IntelliJ
215 | out/
216 |
217 | # mpeltonen/sbt-idea plugin
218 | .idea_modules/
219 |
220 | # JIRA plugin
221 | atlassian-ide-plugin.xml
222 |
223 | # Cursive Clojure plugin
224 | .idea/replstate.xml
225 |
226 | # Crashlytics plugin (for Android Studio and IntelliJ)
227 | com_crashlytics_export_strings.xml
228 | crashlytics.properties
229 | crashlytics-build.properties
230 | fabric.properties
231 |
232 | CLIP-repo/
233 | clip/
234 |
235 | pytorch-YOLOv4/
236 | yolov4.weights
237 | yolov4.cfg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Roboflow Object Tracking Example
2 |
3 | Object tracking using Roboflow Inference API and Zero-Shot (CLIP) Deep SORT. Read more in our
4 | [Zero-Shot Object Tracking announcement post](https://blog.roboflow.com/zero-shot-object-tracking/).
5 |
6 | 
7 |
8 | Example object tracking courtesy of the [Roboflow Universe public Aquarium model and dataset](https://universe.roboflow.com/brad-dwyer/aquarium-combined). You can adapt this to your own dataset on Roboflow or any pre-trained model from [Roboflow Universe](https://universe.roboflow.com).
9 |
10 | # Overview
11 |
12 | Object tracking involves following individual objects of interest across frames. It
13 | combines the output of an [object detection](https://blog.roboflow.com/object-detection) model
14 | with a secondary algorithm to determine which detections are identifying "the same"
15 | object over time.
16 |
17 | Previously, this required training a special classification model to differentiate
18 | the instances of each different class. In this repository, we have used
19 | [OpenAI's CLIP zero-shot image classifier](https://blog.roboflow.com/clip-model-eli5-beginner-guide/)
20 | to create a universal object tracking repository. All you need is a trained object
21 | detection model and CLIP handles the instance identification for the object tracking
22 | algorithm.
23 |
24 | # Getting Started
25 |
26 | Colab Tutorial Here:
27 |
28 |
29 |
30 | ## Training your model
31 |
32 | To use the Roboflow Inference API as your detection engine:
33 |
34 | Upload, annotate, and train your model on Roboflow with [Roboflow Train](https://docs.roboflow.com/train).
35 | Your model will be hosted on an inference URL.
36 |
37 | To use YOLOv7 as your detection engine:
38 |
39 | Follow Roboflow's [Train YOLOv7 on Custom Data Tutorial](https://blog.roboflow.com/yolov7-custom-dataset-training-tutorial/)
40 |
41 | The YOLOv7 implementation uses [this colab notebook](https://colab.research.google.com/drive/1X9A8odmK4k6l26NDviiT6dd6TgR-piOa)
42 |
43 | To use YOLOv5 as your detection engine:
44 |
45 | Follow Roboflow's [Train YOLOv5 on Custom Data Tutorial](https://blog.roboflow.com/how-to-train-yolov5-on-a-custom-dataset/)
46 |
47 | The YOLOv5 implementation uses [this colab notebook](https://colab.research.google.com/drive/1gDZ2xcTOgR39tGGs-EZ6i3RTs16wmzZQ)
48 |
49 | The YOLOv5 implementation is currently compatible with this commit hash of YOLOv5 `886f1c03d839575afecb059accf74296fad395b6`
50 |
51 | ## Performing Object Tracking
52 |
53 | ### Clone repositories
54 |
55 | ```
56 | git clone https://github.com/roboflow-ai/zero-shot-object-tracking
57 | cd zero-shot-object-tracking
58 | git clone https://github.com/openai/CLIP.git CLIP-repo
59 | cp -r ./CLIP-repo/clip ./clip // Unix based
60 | robocopy CLIP-repo/clip clip\ // Windows
61 | ```
62 |
63 | ### Install requirements (python 3.7+)
64 |
65 | ```bash
66 | pip install --upgrade pip
67 | pip install -r requirements.txt
68 | ```
69 |
70 | ### Install requirements (anaconda python 3.8)
71 | ```
72 | conda install pytorch torchvision torchaudio -c pytorch
73 | conda install ftfy regex tqdm requests pandas seaborn
74 | pip install opencv pycocotools tensorflow
75 | ```
76 |
77 | ### Run with Roboflow
78 |
79 | ```bash
80 |
81 | python clip_object_tracker.py --source data/video/fish.mp4 --url https://detect.roboflow.com/playing-cards-ow27d/1 --api_key ROBOFLOW_API_KEY --info
82 | ```
83 |
84 | **NOTE you must provide a valid API key from [Roboflow](docs.roboflow.com)
85 |
86 | ### Run with YOLOv7
87 | ```bash
88 |
89 | python clip_object_tracker.py --weights models/yolov7.pt --source data/video/fish.mp4 --detection-engine yolov7 --info
90 | ```
91 |
92 | ### Run with YOLOv5
93 | ```bash
94 |
95 | python clip_object_tracker.py --weights models/yolov5s.pt --source data/video/fish.mp4 --detection-engine yolov5 --info
96 | ```
97 |
98 | ### Run with YOLOv4
99 | To use YOLOv4 for object detection you will need pretrained weights (.weights file), a model config for your weights (.cfg), and a class names file (.names). Test weights can be found here https://github.com/AlexeyAB/darknet. [yolov4.weights](https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights) [yolov4.cfg](https://raw.githubusercontent.com/AlexeyAB/darknet/master/cfg/yolov4.cfg)
100 | ```
101 | python clip_object_tracker.py --weights yolov4.weights --cfg yolov4.cfg --names coco.names --source data/video/cars.mp4 --detection-engine yolov4 --info
102 | ```
103 | (by default, output will be in runs/detect/exp[num])
104 |
105 |
106 |
109 |
110 |
111 | Help
112 |
113 | ```bash
114 | python clip_object_tracker.py -h
115 | ```
116 | ```
117 | --weights WEIGHTS [WEIGHTS ...] model.pt path(s)
118 | --source SOURCE source (video/image)
119 | --img-size IMG_SIZE inference size (pixels)
120 | --confidence CONFIDENCE object confidence threshold
121 | --overlap OVERLAP IOU threshold for NMS
122 | --thickness THICKNESS Thickness of the bounding box strokes
123 | --device DEVICE cuda device, i.e. 0 or 0,1,2,3 or cpu
124 | --view-img display results
125 | --save-txt save results to *.txt
126 | --save-conf save confidences in --save-txt labels
127 | --classes CLASSES [CLASSES ...] filter by class: --class 0, or --class 0 2 3
128 | --agnostic-nms class-agnostic NMS
129 | --augment augmented inference
130 | --update update all models
131 | --project PROJECT save results to project/name
132 | --name NAME save results to project/name
133 | --exist-ok existing project/name ok, do not increment
134 | --nms_max_overlap Non-maxima suppression threshold: Maximum detection overlap.
135 | --max_cosine_distance Gating threshold for cosine distance metric (object appearance).
136 | --nn_budget NN_BUDGET Maximum size of the appearance descriptors allery. If None, no budget is enforced.
137 | --api_key API_KEY Roboflow API Key.
138 | --url URL Roboflow Model URL.
139 | --info Print debugging info.
140 | --detection-engine Which engine you want to use for object detection (yolov7, yolov5, yolov4, roboflow).
141 | ```
142 | ## Acknowledgements
143 |
144 | Huge thanks to:
145 |
146 | - [yolov4-deepsort by theAIGuysCode](https://github.com/theAIGuysCode/yolov4-deepsort)
147 | - [yolov5 by ultralytics](https://github.com/ultralytics/yolov5)
148 | - [yolov7 by WongKinYiu](https://github.com/WongKinYiu/yolov7)
149 | - [Deep SORT Repository by nwojke](https://github.com/nwojke/deep_sort)
150 | - [OpenAI for being awesome](https://openai.com/blog/clip/)
151 |
--------------------------------------------------------------------------------
/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/cv.yml:
--------------------------------------------------------------------------------
1 | name: cv
2 |
3 | channels:
4 | - conda-forge
5 |
6 | dependencies:
7 | - python==3.8.6
8 | - pip
9 | - cython
10 | - matplotlib>=3.2.2
11 | - numpy>=1.18.5
12 | - PyYAML>=5.3
13 | - scipy>=1.4.1
14 | - tensorboard>=2.2
15 | - torchvision>=0.8.1
16 | - tqdm>=4.41.0
17 | - requests==2.26.0
18 | - pandas==1.3.2
19 | - seaborn>=0.11.0
20 | - ftfy==6.0.3
21 | - pillow
22 | - opencv
23 | - regex
24 | - pip:
25 | - lxml
26 | - torch>=1.7.0
--------------------------------------------------------------------------------
/data/coco.yaml:
--------------------------------------------------------------------------------
1 | # COCO 2017 dataset http://cocodataset.org
2 | # Train command: python train.py --data coco.yaml
3 | # Default dataset location is next to /yolov5:
4 | # /parent_folder
5 | # /coco
6 | # /yolov5
7 |
8 |
9 | # download command/URL (optional)
10 | download: bash data/scripts/get_coco.sh
11 |
12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13 | train: ../coco/train2017.txt # 118287 images
14 | val: ../coco/val2017.txt # 5000 images
15 | test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
16 |
17 | # number of classes
18 | nc: 80
19 |
20 | # class names
21 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
22 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
23 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
24 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
25 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
26 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
27 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
28 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
29 | 'hair drier', 'toothbrush']
30 |
31 | # Print classes
32 | # with open('data/coco.yaml') as f:
33 | # d = yaml.load(f, Loader=yaml.FullLoader) # dict
34 | # for i, x in enumerate(d['names']):
35 | # print(i, x)
36 |
--------------------------------------------------------------------------------
/data/coco128.yaml:
--------------------------------------------------------------------------------
1 | # COCO 2017 dataset http://cocodataset.org - first 128 training images
2 | # Train command: python train.py --data coco128.yaml
3 | # Default dataset location is next to /yolov5:
4 | # /parent_folder
5 | # /coco128
6 | # /yolov5
7 |
8 |
9 | # download command/URL (optional)
10 | download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
11 |
12 | # train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
13 | train: ../coco128/images/train2017/ # 128 images
14 | val: ../coco128/images/train2017/ # 128 images
15 |
16 | # number of classes
17 | nc: 80
18 |
19 | # class names
20 | names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
21 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
22 | 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
23 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
24 | 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
25 | 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
26 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
27 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
28 | 'hair drier', 'toothbrush']
29 |
--------------------------------------------------------------------------------
/data/hyp.finetune.yaml:
--------------------------------------------------------------------------------
1 | # Hyperparameters for VOC finetuning
2 | # python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
4 |
5 |
6 | # Hyperparameter Evolution Results
7 | # Generations: 306
8 | # P R mAP.5 mAP.5:.95 box obj cls
9 | # Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146
10 |
11 | lr0: 0.0032
12 | lrf: 0.12
13 | momentum: 0.843
14 | weight_decay: 0.00036
15 | warmup_epochs: 2.0
16 | warmup_momentum: 0.5
17 | warmup_bias_lr: 0.05
18 | box: 0.0296
19 | cls: 0.243
20 | cls_pw: 0.631
21 | obj: 0.301
22 | obj_pw: 0.911
23 | iou_t: 0.2
24 | anchor_t: 2.91
25 | # anchors: 3.63
26 | fl_gamma: 0.0
27 | hsv_h: 0.0138
28 | hsv_s: 0.664
29 | hsv_v: 0.464
30 | degrees: 0.373
31 | translate: 0.245
32 | scale: 0.898
33 | shear: 0.602
34 | perspective: 0.0
35 | flipud: 0.00856
36 | fliplr: 0.5
37 | mosaic: 1.0
38 | mixup: 0.243
39 |
--------------------------------------------------------------------------------
/data/hyp.scratch.yaml:
--------------------------------------------------------------------------------
1 | # Hyperparameters for COCO training from scratch
2 | # python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
3 | # See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
4 |
5 |
6 | lr0: 0.01 # initial learning rate (SGD=1E-2, Adam=1E-3)
7 | lrf: 0.2 # final OneCycleLR learning rate (lr0 * lrf)
8 | momentum: 0.937 # SGD momentum/Adam beta1
9 | weight_decay: 0.0005 # optimizer weight decay 5e-4
10 | warmup_epochs: 3.0 # warmup epochs (fractions ok)
11 | warmup_momentum: 0.8 # warmup initial momentum
12 | warmup_bias_lr: 0.1 # warmup initial bias lr
13 | box: 0.05 # box loss gain
14 | cls: 0.5 # cls loss gain
15 | cls_pw: 1.0 # cls BCELoss positive_weight
16 | obj: 1.0 # obj loss gain (scale with pixels)
17 | obj_pw: 1.0 # obj BCELoss positive_weight
18 | iou_t: 0.20 # IoU training threshold
19 | anchor_t: 4.0 # anchor-multiple threshold
20 | # anchors: 3 # anchors per output layer (0 to ignore)
21 | fl_gamma: 0.0 # focal loss gamma (efficientDet default gamma=1.5)
22 | hsv_h: 0.015 # image HSV-Hue augmentation (fraction)
23 | hsv_s: 0.7 # image HSV-Saturation augmentation (fraction)
24 | hsv_v: 0.4 # image HSV-Value augmentation (fraction)
25 | degrees: 0.0 # image rotation (+/- deg)
26 | translate: 0.1 # image translation (+/- fraction)
27 | scale: 0.5 # image scale (+/- gain)
28 | shear: 0.0 # image shear (+/- deg)
29 | perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
30 | flipud: 0.0 # image flip up-down (probability)
31 | fliplr: 0.5 # image flip left-right (probability)
32 | mosaic: 1.0 # image mosaic (probability)
33 | mixup: 0.0 # image mixup (probability)
34 |
--------------------------------------------------------------------------------
/data/images/bus.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/images/bus.jpg
--------------------------------------------------------------------------------
/data/images/zidane.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/data/images/zidane.jpg
--------------------------------------------------------------------------------
/data/scripts/get_coco.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # COCO 2017 dataset http://cocodataset.org
3 | # Download command: bash data/scripts/get_coco.sh
4 | # Train command: python train.py --data coco.yaml
5 | # Default dataset location is next to /yolov5:
6 | # /parent_folder
7 | # /coco
8 | # /yolov5
9 |
10 | # Download/unzip labels
11 | d='../' # unzip directory
12 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
13 | f='coco2017labels.zip' # 68 MB
14 | echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
15 |
16 | # Download/unzip images
17 | d='../coco/images' # unzip directory
18 | url=http://images.cocodataset.org/zips/
19 | f1='train2017.zip' # 19G, 118k images
20 | f2='val2017.zip' # 1G, 5k images
21 | f3='test2017.zip' # 7G, 41k images (optional)
22 | for f in $f1 $f2; do
23 | echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
24 | done
25 |
--------------------------------------------------------------------------------
/data/scripts/get_voc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
3 | # Download command: bash data/scripts/get_voc.sh
4 | # Train command: python train.py --data voc.yaml
5 | # Default dataset location is next to /yolov5:
6 | # /parent_folder
7 | # /VOC
8 | # /yolov5
9 |
10 | start=$(date +%s)
11 | mkdir -p ../tmp
12 | cd ../tmp/
13 |
14 | # Download/unzip images and labels
15 | d='.' # unzip directory
16 | url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
17 | f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
18 | f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
19 | f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
20 | for f in $f1 $f2 $f3; do
21 | echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
22 | done
23 |
24 | end=$(date +%s)
25 | runtime=$((end - start))
26 | echo "Completed in" $runtime "seconds"
27 |
28 | echo "Splitting dataset..."
29 | python3 - "$@" <train.txt
89 | cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
90 |
91 | python3 - "$@" < 1:
75 | cost_matrix[row, :] = linear_assignment.INFTY_COST
76 | continue
77 |
78 | bbox = tracks[track_idx].to_tlwh()
79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 | cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 | return cost_matrix
82 |
--------------------------------------------------------------------------------
/deep_sort/kalman_filter.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import scipy.linalg
4 |
5 |
6 | """
7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9 | function and used as Mahalanobis gating threshold.
10 | """
11 | chi2inv95 = {
12 | 1: 3.8415,
13 | 2: 5.9915,
14 | 3: 7.8147,
15 | 4: 9.4877,
16 | 5: 11.070,
17 | 6: 12.592,
18 | 7: 14.067,
19 | 8: 15.507,
20 | 9: 16.919}
21 |
22 |
23 | class KalmanFilter(object):
24 | """
25 | A simple Kalman filter for tracking bounding boxes in image space.
26 |
27 | The 8-dimensional state space
28 |
29 | x, y, a, h, vx, vy, va, vh
30 |
31 | contains the bounding box center position (x, y), aspect ratio a, height h,
32 | and their respective velocities.
33 |
34 | Object motion follows a constant velocity model. The bounding box location
35 | (x, y, a, h) is taken as direct observation of the state space (linear
36 | observation model).
37 |
38 | """
39 |
40 | def __init__(self):
41 | ndim, dt = 4, 1.
42 |
43 | # Create Kalman filter model matrices.
44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45 | for i in range(ndim):
46 | self._motion_mat[i, ndim + i] = dt
47 | self._update_mat = np.eye(ndim, 2 * ndim)
48 |
49 | # Motion and observation uncertainty are chosen relative to the current
50 | # state estimate. These weights control the amount of uncertainty in
51 | # the model. This is a bit hacky.
52 | self._std_weight_position = 1. / 20
53 | self._std_weight_velocity = 1. / 160
54 |
55 | def initiate(self, measurement):
56 | """Create track from unassociated measurement.
57 |
58 | Parameters
59 | ----------
60 | measurement : ndarray
61 | Bounding box coordinates (x, y, a, h) with center position (x, y),
62 | aspect ratio a, and height h.
63 |
64 | Returns
65 | -------
66 | (ndarray, ndarray)
67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
68 | dimensional) of the new track. Unobserved velocities are initialized
69 | to 0 mean.
70 |
71 | """
72 | mean_pos = measurement
73 | mean_vel = np.zeros_like(mean_pos)
74 | mean = np.r_[mean_pos, mean_vel]
75 |
76 | std = [
77 | 2 * self._std_weight_position * measurement[3],
78 | 2 * self._std_weight_position * measurement[3],
79 | 1e-2,
80 | 2 * self._std_weight_position * measurement[3],
81 | 10 * self._std_weight_velocity * measurement[3],
82 | 10 * self._std_weight_velocity * measurement[3],
83 | 1e-5,
84 | 10 * self._std_weight_velocity * measurement[3]]
85 | covariance = np.diag(np.square(std))
86 | return mean, covariance
87 |
88 | def predict(self, mean, covariance):
89 | """Run Kalman filter prediction step.
90 |
91 | Parameters
92 | ----------
93 | mean : ndarray
94 | The 8 dimensional mean vector of the object state at the previous
95 | time step.
96 | covariance : ndarray
97 | The 8x8 dimensional covariance matrix of the object state at the
98 | previous time step.
99 |
100 | Returns
101 | -------
102 | (ndarray, ndarray)
103 | Returns the mean vector and covariance matrix of the predicted
104 | state. Unobserved velocities are initialized to 0 mean.
105 |
106 | """
107 | std_pos = [
108 | self._std_weight_position * mean[3],
109 | self._std_weight_position * mean[3],
110 | 1e-2,
111 | self._std_weight_position * mean[3]]
112 | std_vel = [
113 | self._std_weight_velocity * mean[3],
114 | self._std_weight_velocity * mean[3],
115 | 1e-5,
116 | self._std_weight_velocity * mean[3]]
117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 |
119 | mean = np.dot(self._motion_mat, mean)
120 | covariance = np.linalg.multi_dot((
121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 |
123 | return mean, covariance
124 |
125 | def project(self, mean, covariance):
126 | """Project state distribution to measurement space.
127 |
128 | Parameters
129 | ----------
130 | mean : ndarray
131 | The state's mean vector (8 dimensional array).
132 | covariance : ndarray
133 | The state's covariance matrix (8x8 dimensional).
134 |
135 | Returns
136 | -------
137 | (ndarray, ndarray)
138 | Returns the projected mean and covariance matrix of the given state
139 | estimate.
140 |
141 | """
142 | std = [
143 | self._std_weight_position * mean[3],
144 | self._std_weight_position * mean[3],
145 | 1e-1,
146 | self._std_weight_position * mean[3]]
147 | innovation_cov = np.diag(np.square(std))
148 |
149 | mean = np.dot(self._update_mat, mean)
150 | covariance = np.linalg.multi_dot((
151 | self._update_mat, covariance, self._update_mat.T))
152 | return mean, covariance + innovation_cov
153 |
154 | def update(self, mean, covariance, measurement):
155 | """Run Kalman filter correction step.
156 |
157 | Parameters
158 | ----------
159 | mean : ndarray
160 | The predicted state's mean vector (8 dimensional).
161 | covariance : ndarray
162 | The state's covariance matrix (8x8 dimensional).
163 | measurement : ndarray
164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 | is the center position, a the aspect ratio, and h the height of the
166 | bounding box.
167 |
168 | Returns
169 | -------
170 | (ndarray, ndarray)
171 | Returns the measurement-corrected state distribution.
172 |
173 | """
174 | projected_mean, projected_cov = self.project(mean, covariance)
175 |
176 | chol_factor, lower = scipy.linalg.cho_factor(
177 | projected_cov, lower=True, check_finite=False)
178 | kalman_gain = scipy.linalg.cho_solve(
179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 | check_finite=False).T
181 | innovation = measurement - projected_mean
182 |
183 | new_mean = mean + np.dot(innovation, kalman_gain.T)
184 | new_covariance = covariance - np.linalg.multi_dot((
185 | kalman_gain, projected_cov, kalman_gain.T))
186 | return new_mean, new_covariance
187 |
188 | def gating_distance(self, mean, covariance, measurements,
189 | only_position=False):
190 | """Compute gating distance between state distribution and measurements.
191 |
192 | A suitable distance threshold can be obtained from `chi2inv95`. If
193 | `only_position` is False, the chi-square distribution has 4 degrees of
194 | freedom, otherwise 2.
195 |
196 | Parameters
197 | ----------
198 | mean : ndarray
199 | Mean vector over the state distribution (8 dimensional).
200 | covariance : ndarray
201 | Covariance of the state distribution (8x8 dimensional).
202 | measurements : ndarray
203 | An Nx4 dimensional matrix of N measurements, each in
204 | format (x, y, a, h) where (x, y) is the bounding box center
205 | position, a the aspect ratio, and h the height.
206 | only_position : Optional[bool]
207 | If True, distance computation is done with respect to the bounding
208 | box center position only.
209 |
210 | Returns
211 | -------
212 | ndarray
213 | Returns an array of length N, where the i-th element contains the
214 | squared Mahalanobis distance between (mean, covariance) and
215 | `measurements[i]`.
216 |
217 | """
218 | mean, covariance = self.project(mean, covariance)
219 | if only_position:
220 | mean, covariance = mean[:2], covariance[:2, :2]
221 | measurements = measurements[:, :2]
222 |
223 | cholesky_factor = np.linalg.cholesky(covariance)
224 | d = measurements - mean
225 | z = scipy.linalg.solve_triangular(
226 | cholesky_factor, d.T, lower=True, check_finite=False,
227 | overwrite_b=True)
228 | squared_maha = np.sum(z * z, axis=0)
229 | return squared_maha
230 |
--------------------------------------------------------------------------------
/deep_sort/linear_assignment.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from scipy.optimize import linear_sum_assignment
5 | from . import kalman_filter
6 |
7 |
8 | INFTY_COST = 1e+5
9 |
10 |
11 | def min_cost_matching(
12 | distance_metric, max_distance, tracks, detections, track_indices=None,
13 | detection_indices=None):
14 | """Solve linear assignment problem.
15 |
16 | Parameters
17 | ----------
18 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
19 | The distance metric is given a list of tracks and detections as well as
20 | a list of N track indices and M detection indices. The metric should
21 | return the NxM dimensional cost matrix, where element (i, j) is the
22 | association cost between the i-th track in the given track indices and
23 | the j-th detection in the given detection_indices.
24 | max_distance : float
25 | Gating threshold. Associations with cost larger than this value are
26 | disregarded.
27 | tracks : List[track.Track]
28 | A list of predicted tracks at the current time step.
29 | detections : List[detection.Detection]
30 | A list of detections at the current time step.
31 | track_indices : List[int]
32 | List of track indices that maps rows in `cost_matrix` to tracks in
33 | `tracks` (see description above).
34 | detection_indices : List[int]
35 | List of detection indices that maps columns in `cost_matrix` to
36 | detections in `detections` (see description above).
37 |
38 | Returns
39 | -------
40 | (List[(int, int)], List[int], List[int])
41 | Returns a tuple with the following three entries:
42 | * A list of matched track and detection indices.
43 | * A list of unmatched track indices.
44 | * A list of unmatched detection indices.
45 |
46 | """
47 | if track_indices is None:
48 | track_indices = np.arange(len(tracks))
49 | if detection_indices is None:
50 | detection_indices = np.arange(len(detections))
51 |
52 | if len(detection_indices) == 0 or len(track_indices) == 0:
53 | return [], track_indices, detection_indices # Nothing to match.
54 |
55 | cost_matrix = distance_metric(
56 | tracks, detections, track_indices, detection_indices)
57 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
58 |
59 | # Start of Edited Block by Maxwell Stone
60 | cost_matrix = np.nan_to_num(cost_matrix, copy=True, nan=0.0, posinf=None, neginf=None)
61 | # This code is not from the original DeepSORT algorithm and should be considered if there are tracking issues.
62 | # This line replaces Nan values, caused by incorrect CLIP detections, with 0's to stop from crashing.
63 | # **Warning. Issues may arise from this config. It is not completely tested.
64 | # End of Edited Block by Maxwell Stone
65 |
66 | indices = linear_sum_assignment(cost_matrix)
67 | indices = np.asarray(indices)
68 | indices = np.transpose(indices)
69 | matches, unmatched_tracks, unmatched_detections = [], [], []
70 | for col, detection_idx in enumerate(detection_indices):
71 | if col not in indices[:, 1]:
72 | unmatched_detections.append(detection_idx)
73 | for row, track_idx in enumerate(track_indices):
74 | if row not in indices[:, 0]:
75 | unmatched_tracks.append(track_idx)
76 | for row, col in indices:
77 | track_idx = track_indices[row]
78 | detection_idx = detection_indices[col]
79 | if cost_matrix[row, col] > max_distance:
80 | unmatched_tracks.append(track_idx)
81 | unmatched_detections.append(detection_idx)
82 | else:
83 | matches.append((track_idx, detection_idx))
84 | return matches, unmatched_tracks, unmatched_detections
85 |
86 |
87 | def matching_cascade(
88 | distance_metric, max_distance, cascade_depth, tracks, detections,
89 | track_indices=None, detection_indices=None):
90 | """Run matching cascade.
91 |
92 | Parameters
93 | ----------
94 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
95 | The distance metric is given a list of tracks and detections as well as
96 | a list of N track indices and M detection indices. The metric should
97 | return the NxM dimensional cost matrix, where element (i, j) is the
98 | association cost between the i-th track in the given track indices and
99 | the j-th detection in the given detection indices.
100 | max_distance : float
101 | Gating threshold. Associations with cost larger than this value are
102 | disregarded.
103 | cascade_depth: int
104 | The cascade depth, should be se to the maximum track age.
105 | tracks : List[track.Track]
106 | A list of predicted tracks at the current time step.
107 | detections : List[detection.Detection]
108 | A list of detections at the current time step.
109 | track_indices : Optional[List[int]]
110 | List of track indices that maps rows in `cost_matrix` to tracks in
111 | `tracks` (see description above). Defaults to all tracks.
112 | detection_indices : Optional[List[int]]
113 | List of detection indices that maps columns in `cost_matrix` to
114 | detections in `detections` (see description above). Defaults to all
115 | detections.
116 |
117 | Returns
118 | -------
119 | (List[(int, int)], List[int], List[int])
120 | Returns a tuple with the following three entries:
121 | * A list of matched track and detection indices.
122 | * A list of unmatched track indices.
123 | * A list of unmatched detection indices.
124 |
125 | """
126 | if track_indices is None:
127 | track_indices = list(range(len(tracks)))
128 | if detection_indices is None:
129 | detection_indices = list(range(len(detections)))
130 |
131 | unmatched_detections = detection_indices
132 | matches = []
133 | for level in range(cascade_depth):
134 | if len(unmatched_detections) == 0: # No detections left
135 | break
136 |
137 | track_indices_l = [
138 | k for k in track_indices
139 | if tracks[k].time_since_update == 1 + level
140 | ]
141 | if len(track_indices_l) == 0: # Nothing to match at this level
142 | continue
143 |
144 | matches_l, _, unmatched_detections = \
145 | min_cost_matching(
146 | distance_metric, max_distance, tracks, detections,
147 | track_indices_l, unmatched_detections)
148 | matches += matches_l
149 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
150 | return matches, unmatched_tracks, unmatched_detections
151 |
152 |
153 | def gate_cost_matrix(
154 | kf, cost_matrix, tracks, detections, track_indices, detection_indices,
155 | gated_cost=INFTY_COST, only_position=False):
156 | """Invalidate infeasible entries in cost matrix based on the state
157 | distributions obtained by Kalman filtering.
158 |
159 | Parameters
160 | ----------
161 | kf : The Kalman filter.
162 | cost_matrix : ndarray
163 | The NxM dimensional cost matrix, where N is the number of track indices
164 | and M is the number of detection indices, such that entry (i, j) is the
165 | association cost between `tracks[track_indices[i]]` and
166 | `detections[detection_indices[j]]`.
167 | tracks : List[track.Track]
168 | A list of predicted tracks at the current time step.
169 | detections : List[detection.Detection]
170 | A list of detections at the current time step.
171 | track_indices : List[int]
172 | List of track indices that maps rows in `cost_matrix` to tracks in
173 | `tracks` (see description above).
174 | detection_indices : List[int]
175 | List of detection indices that maps columns in `cost_matrix` to
176 | detections in `detections` (see description above).
177 | gated_cost : Optional[float]
178 | Entries in the cost matrix corresponding to infeasible associations are
179 | set this value. Defaults to a very large value.
180 | only_position : Optional[bool]
181 | If True, only the x, y position of the state distribution is considered
182 | during gating. Defaults to False.
183 |
184 | Returns
185 | -------
186 | ndarray
187 | Returns the modified cost matrix.
188 |
189 | """
190 | gating_dim = 2 if only_position else 4
191 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
192 | measurements = np.asarray(
193 | [detections[i].to_xyah() for i in detection_indices])
194 | for row, track_idx in enumerate(track_indices):
195 | track = tracks[track_idx]
196 | gating_distance = kf.gating_distance(
197 | track.mean, track.covariance, measurements, only_position)
198 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost
199 | return cost_matrix
200 |
--------------------------------------------------------------------------------
/deep_sort/nn_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | def _pdist(a, b):
6 | """Compute pair-wise squared distance between points in `a` and `b`.
7 |
8 | Parameters
9 | ----------
10 | a : array_like
11 | An NxM matrix of N samples of dimensionality M.
12 | b : array_like
13 | An LxM matrix of L samples of dimensionality M.
14 |
15 | Returns
16 | -------
17 | ndarray
18 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
19 | contains the squared distance between `a[i]` and `b[j]`.
20 |
21 | """
22 | a, b = np.asarray(a), np.asarray(b)
23 | if len(a) == 0 or len(b) == 0:
24 | return np.zeros((len(a), len(b)))
25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
27 | r2 = np.clip(r2, 0., float(np.inf))
28 | return r2
29 |
30 |
31 | def _cosine_distance(a, b, data_is_normalized=False):
32 | """Compute pair-wise cosine distance between points in `a` and `b`.
33 |
34 | Parameters
35 | ----------
36 | a : array_like
37 | An NxM matrix of N samples of dimensionality M.
38 | b : array_like
39 | An LxM matrix of L samples of dimensionality M.
40 | data_is_normalized : Optional[bool]
41 | If True, assumes rows in a and b are unit length vectors.
42 | Otherwise, a and b are explicitly normalized to lenght 1.
43 |
44 | Returns
45 | -------
46 | ndarray
47 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
48 | contains the squared distance between `a[i]` and `b[j]`.
49 |
50 | """
51 | if not data_is_normalized:
52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
54 | return 1. - np.dot(a, b.T)
55 |
56 |
57 | def _nn_euclidean_distance(x, y):
58 | """ Helper function for nearest neighbor distance metric (Euclidean).
59 |
60 | Parameters
61 | ----------
62 | x : ndarray
63 | A matrix of N row-vectors (sample points).
64 | y : ndarray
65 | A matrix of M row-vectors (query points).
66 |
67 | Returns
68 | -------
69 | ndarray
70 | A vector of length M that contains for each entry in `y` the
71 | smallest Euclidean distance to a sample in `x`.
72 |
73 | """
74 | distances = _pdist(x, y)
75 | return np.maximum(0.0, distances.min(axis=0))
76 |
77 |
78 | def _nn_cosine_distance(x, y):
79 | """ Helper function for nearest neighbor distance metric (cosine).
80 |
81 | Parameters
82 | ----------
83 | x : ndarray
84 | A matrix of N row-vectors (sample points).
85 | y : ndarray
86 | A matrix of M row-vectors (query points).
87 |
88 | Returns
89 | -------
90 | ndarray
91 | A vector of length M that contains for each entry in `y` the
92 | smallest cosine distance to a sample in `x`.
93 |
94 | """
95 | distances = _cosine_distance(x, y)
96 | return distances.min(axis=0)
97 |
98 |
99 | class NearestNeighborDistanceMetric(object):
100 | """
101 | A nearest neighbor distance metric that, for each target, returns
102 | the closest distance to any sample that has been observed so far.
103 |
104 | Parameters
105 | ----------
106 | metric : str
107 | Either "euclidean" or "cosine".
108 | matching_threshold: float
109 | The matching threshold. Samples with larger distance are considered an
110 | invalid match.
111 | budget : Optional[int]
112 | If not None, fix samples per class to at most this number. Removes
113 | the oldest samples when the budget is reached.
114 |
115 | Attributes
116 | ----------
117 | samples : Dict[int -> List[ndarray]]
118 | A dictionary that maps from target identities to the list of samples
119 | that have been observed so far.
120 |
121 | """
122 |
123 | def __init__(self, metric, matching_threshold, budget=None):
124 |
125 |
126 | if metric == "euclidean":
127 | self._metric = _nn_euclidean_distance
128 | elif metric == "cosine":
129 | self._metric = _nn_cosine_distance
130 | else:
131 | raise ValueError(
132 | "Invalid metric; must be either 'euclidean' or 'cosine'")
133 | self.matching_threshold = matching_threshold
134 | self.budget = budget
135 | self.samples = {}
136 |
137 | def partial_fit(self, features, targets, active_targets):
138 | """Update the distance metric with new data.
139 |
140 | Parameters
141 | ----------
142 | features : ndarray
143 | An NxM matrix of N features of dimensionality M.
144 | targets : ndarray
145 | An integer array of associated target identities.
146 | active_targets : List[int]
147 | A list of targets that are currently present in the scene.
148 |
149 | """
150 | for feature, target in zip(features, targets):
151 | self.samples.setdefault(target, []).append(feature)
152 | if self.budget is not None:
153 | self.samples[target] = self.samples[target][-self.budget:]
154 | self.samples = {k: self.samples[k] for k in active_targets}
155 |
156 | def distance(self, features, targets):
157 | """Compute distance between features and targets.
158 |
159 | Parameters
160 | ----------
161 | features : ndarray
162 | An NxM matrix of N features of dimensionality M.
163 | targets : List[int]
164 | A list of targets to match the given `features` against.
165 |
166 | Returns
167 | -------
168 | ndarray
169 | Returns a cost matrix of shape len(targets), len(features), where
170 | element (i, j) contains the closest squared distance between
171 | `targets[i]` and `features[j]`.
172 |
173 | """
174 | cost_matrix = np.zeros((len(targets), len(features)))
175 | for i, target in enumerate(targets):
176 | cost_matrix[i, :] = self._metric(self.samples[target], features)
177 | return cost_matrix
178 |
--------------------------------------------------------------------------------
/deep_sort/preprocessing.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import cv2
4 |
5 |
6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
7 | """Suppress overlapping detections.
8 |
9 | Original code from [1]_ has been adapted to include confidence score.
10 |
11 | .. [1] http://www.pyimagesearch.com/2015/02/16/
12 | faster-non-maximum-suppression-python/
13 |
14 | Examples
15 | --------
16 |
17 | >>> boxes = [d.roi for d in detections]
18 | >>> classes = [d.classes for d in detections]
19 | >>> scores = [d.confidence for d in detections]
20 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
21 | >>> detections = [detections[i] for i in indices]
22 |
23 | Parameters
24 | ----------
25 | boxes : ndarray
26 | Array of ROIs (x, y, width, height).
27 | max_bbox_overlap : float
28 | ROIs that overlap more than this values are suppressed.
29 | scores : Optional[array_like]
30 | Detector confidence score.
31 |
32 | Returns
33 | -------
34 | List[int]
35 | Returns indices of detections that have survived non-maxima suppression.
36 |
37 | """
38 | if len(boxes) == 0:
39 | return []
40 |
41 | boxes = boxes.astype(np.float)
42 | pick = []
43 |
44 | x1 = boxes[:, 0]
45 | y1 = boxes[:, 1]
46 | x2 = boxes[:, 2] + boxes[:, 0]
47 | y2 = boxes[:, 3] + boxes[:, 1]
48 |
49 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
50 | if scores is not None:
51 | idxs = np.argsort(scores)
52 | else:
53 | idxs = np.argsort(y2)
54 |
55 | while len(idxs) > 0:
56 | last = len(idxs) - 1
57 | i = idxs[last]
58 | pick.append(i)
59 |
60 | xx1 = np.maximum(x1[i], x1[idxs[:last]])
61 | yy1 = np.maximum(y1[i], y1[idxs[:last]])
62 | xx2 = np.minimum(x2[i], x2[idxs[:last]])
63 | yy2 = np.minimum(y2[i], y2[idxs[:last]])
64 |
65 | w = np.maximum(0, xx2 - xx1 + 1)
66 | h = np.maximum(0, yy2 - yy1 + 1)
67 |
68 | overlap = (w * h) / area[idxs[:last]]
69 |
70 | idxs = np.delete(
71 | idxs, np.concatenate(
72 | ([last], np.where(overlap > max_bbox_overlap)[0])))
73 |
74 | return pick
75 |
--------------------------------------------------------------------------------
/deep_sort/track.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 |
3 |
4 | class TrackState:
5 | """
6 | Enumeration type for the single target track state. Newly created tracks are
7 | classified as `tentative` until enough evidence has been collected. Then,
8 | the track state is changed to `confirmed`. Tracks that are no longer alive
9 | are classified as `deleted` to mark them for removal from the set of active
10 | tracks.
11 |
12 | """
13 |
14 | Tentative = 1
15 | Confirmed = 2
16 | Deleted = 3
17 |
18 |
19 | class Track:
20 | """
21 | A single target track with state space `(x, y, a, h)` and associated
22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the
23 | aspect ratio and `h` is the height.
24 |
25 | Parameters
26 | ----------
27 | mean : ndarray
28 | Mean vector of the initial state distribution.
29 | covariance : ndarray
30 | Covariance matrix of the initial state distribution.
31 | track_id : int
32 | A unique track identifier.
33 | n_init : int
34 | Number of consecutive detections before the track is confirmed. The
35 | track state is set to `Deleted` if a miss occurs within the first
36 | `n_init` frames.
37 | max_age : int
38 | The maximum number of consecutive misses before the track state is
39 | set to `Deleted`.
40 | feature : Optional[ndarray]
41 | Feature vector of the detection this track originates from. If not None,
42 | this feature is added to the `features` cache.
43 |
44 | Attributes
45 | ----------
46 | mean : ndarray
47 | Mean vector of the initial state distribution.
48 | covariance : ndarray
49 | Covariance matrix of the initial state distribution.
50 | track_id : int
51 | A unique track identifier.
52 | hits : int
53 | Total number of measurement updates.
54 | age : int
55 | Total number of frames since first occurance.
56 | time_since_update : int
57 | Total number of frames since last measurement update.
58 | state : TrackState
59 | The current track state.
60 | features : List[ndarray]
61 | A cache of features. On each measurement update, the associated feature
62 | vector is added to this list.
63 |
64 | """
65 |
66 | def __init__(self, mean, covariance, track_id, n_init, max_age,
67 | feature=None, class_num=None):
68 | self.mean = mean
69 | self.covariance = covariance
70 | self.track_id = track_id
71 | self.hits = 1
72 | self.age = 1
73 | self.time_since_update = 0
74 |
75 | self.state = TrackState.Tentative
76 | self.features = []
77 | if feature is not None:
78 | self.features.append(feature)
79 |
80 | self._n_init = n_init
81 | self._max_age = max_age
82 | self.class_num = class_num
83 |
84 | def to_tlwh(self):
85 | """Get current position in bounding box format `(top left x, top left y,
86 | width, height)`.
87 |
88 | Returns
89 | -------
90 | ndarray
91 | The bounding box.
92 |
93 | """
94 | ret = self.mean[:4].copy()
95 | ret[2] *= ret[3]
96 | ret[:2] -= ret[2:] / 2
97 | return ret
98 |
99 | def to_tlbr(self):
100 | """Get current position in bounding box format `(min x, miny, max x,
101 | max y)`.
102 |
103 | Returns
104 | -------
105 | ndarray
106 | The bounding box.
107 |
108 | """
109 | ret = self.to_tlwh()
110 | ret[2:] = ret[:2] + ret[2:]
111 | return ret
112 |
113 | def predict(self, kf):
114 | """Propagate the state distribution to the current time step using a
115 | Kalman filter prediction step.
116 |
117 | Parameters
118 | ----------
119 | kf : kalman_filter.KalmanFilter
120 | The Kalman filter.
121 |
122 | """
123 | self.mean, self.covariance = kf.predict(self.mean, self.covariance)
124 | self.age += 1
125 | self.time_since_update += 1
126 |
127 | def update(self, kf, detection):
128 | """Perform Kalman filter measurement update step and update the feature
129 | cache.
130 |
131 | Parameters
132 | ----------
133 | kf : kalman_filter.KalmanFilter
134 | The Kalman filter.
135 | detection : Detection
136 | The associated detection.
137 |
138 | """
139 | self.mean, self.covariance = kf.update(
140 | self.mean, self.covariance, detection.to_xyah())
141 | self.features.append(detection.feature)
142 |
143 | self.hits += 1
144 | self.time_since_update = 0
145 | if self.state == TrackState.Tentative and self.hits >= self._n_init:
146 | self.state = TrackState.Confirmed
147 |
148 | def mark_missed(self):
149 | """Mark this track as missed (no association at the current time step).
150 | """
151 | if self.state == TrackState.Tentative:
152 | self.state = TrackState.Deleted
153 | elif self.time_since_update > self._max_age:
154 | self.state = TrackState.Deleted
155 |
156 | def is_tentative(self):
157 | """Returns True if this track is tentative (unconfirmed).
158 | """
159 | return self.state == TrackState.Tentative
160 |
161 | def is_confirmed(self):
162 | """Returns True if this track is confirmed."""
163 | return self.state == TrackState.Confirmed
164 |
165 | def is_deleted(self):
166 | """Returns True if this track is dead and should be deleted."""
167 | return self.state == TrackState.Deleted
168 |
--------------------------------------------------------------------------------
/deep_sort/tracker.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from . import kalman_filter
5 | from . import linear_assignment
6 | from . import iou_matching
7 | from .track import Track
8 |
9 |
10 | class Tracker:
11 | """
12 | This is the multi-target tracker.
13 |
14 | Parameters
15 | ----------
16 | metric : nn_matching.NearestNeighborDistanceMetric
17 | A distance metric for measurement-to-track association.
18 | max_age : int
19 | Maximum number of missed misses before a track is deleted.
20 | n_init : int
21 | Number of consecutive detections before the track is confirmed. The
22 | track state is set to `Deleted` if a miss occurs within the first
23 | `n_init` frames.
24 |
25 | Attributes
26 | ----------
27 | metric : nn_matching.NearestNeighborDistanceMetric
28 | The distance metric used for measurement to track association.
29 | max_age : int
30 | Maximum number of missed misses before a track is deleted.
31 | n_init : int
32 | Number of frames that a track remains in initialization phase.
33 | kf : kalman_filter.KalmanFilter
34 | A Kalman filter to filter target trajectories in image space.
35 | tracks : List[Track]
36 | The list of active tracks at the current time step.
37 |
38 | """
39 |
40 | def __init__(self, metric, max_iou_distance=0.7, max_age=60, n_init=3):
41 | self.metric = metric
42 | self.max_iou_distance = max_iou_distance
43 | self.max_age = max_age
44 | self.n_init = n_init
45 |
46 | self.kf = kalman_filter.KalmanFilter()
47 | self.tracks = []
48 | self._next_id = 1
49 |
50 | def predict(self):
51 | """Propagate track state distributions one time step forward.
52 |
53 | This function should be called once every time step, before `update`.
54 | """
55 | for track in self.tracks:
56 | track.predict(self.kf)
57 |
58 | def update(self, detections):
59 | """Perform measurement update and track management.
60 |
61 | Parameters
62 | ----------
63 | detections : List[deep_sort.detection.Detection]
64 | A list of detections at the current time step.
65 |
66 | """
67 | # Run matching cascade.
68 | matches, unmatched_tracks, unmatched_detections = \
69 | self._match(detections)
70 |
71 | # Update track set.
72 | for track_idx, detection_idx in matches:
73 | self.tracks[track_idx].update(
74 | self.kf, detections[detection_idx])
75 | for track_idx in unmatched_tracks:
76 | self.tracks[track_idx].mark_missed()
77 | for detection_idx in unmatched_detections:
78 | self._initiate_track(detections[detection_idx])
79 | self.tracks = [t for t in self.tracks if not t.is_deleted()]
80 |
81 | # Update distance metric.
82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
83 | features, targets = [], []
84 | for track in self.tracks:
85 | if not track.is_confirmed():
86 | continue
87 | features += track.features
88 | targets += [track.track_id for _ in track.features]
89 | track.features = []
90 | self.metric.partial_fit(
91 | np.asarray(features), np.asarray(targets), active_targets)
92 |
93 | def _match(self, detections):
94 |
95 | def gated_metric(tracks, dets, track_indices, detection_indices):
96 | features = np.array([dets[i].feature for i in detection_indices])
97 | targets = np.array([tracks[i].track_id for i in track_indices])
98 | cost_matrix = self.metric.distance(features, targets)
99 | cost_matrix = linear_assignment.gate_cost_matrix(
100 | self.kf, cost_matrix, tracks, dets, track_indices,
101 | detection_indices)
102 |
103 | return cost_matrix
104 |
105 | # Split track set into confirmed and unconfirmed tracks.
106 | confirmed_tracks = [
107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 | unconfirmed_tracks = [
109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 |
111 | # Associate confirmed tracks using appearance features.
112 | matches_a, unmatched_tracks_a, unmatched_detections = \
113 | linear_assignment.matching_cascade(
114 | gated_metric, self.metric.matching_threshold, self.max_age,
115 | self.tracks, detections, confirmed_tracks)
116 |
117 | # Associate remaining tracks together with unconfirmed tracks using IOU.
118 | iou_track_candidates = unconfirmed_tracks + [
119 | k for k in unmatched_tracks_a if
120 | self.tracks[k].time_since_update == 1]
121 | unmatched_tracks_a = [
122 | k for k in unmatched_tracks_a if
123 | self.tracks[k].time_since_update != 1]
124 | matches_b, unmatched_tracks_b, unmatched_detections = \
125 | linear_assignment.min_cost_matching(
126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 | detections, iou_track_candidates, unmatched_detections)
128 |
129 | matches = matches_a + matches_b
130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 | return matches, unmatched_tracks, unmatched_detections
132 |
133 | def _initiate_track(self, detection):
134 | mean, covariance = self.kf.initiate(detection.to_xyah())
135 | class_num = detection.class_num
136 | self.tracks.append(Track(
137 | mean, covariance, self._next_id, self.n_init, self.max_age,
138 | detection.feature, class_num))
139 | self._next_id += 1
140 |
--------------------------------------------------------------------------------
/example/video/fish.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/example/video/fish.mp4
--------------------------------------------------------------------------------
/model_data/mars-small128.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/model_data/mars-small128.pb
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/models/__init__.py
--------------------------------------------------------------------------------
/models/experimental.py:
--------------------------------------------------------------------------------
1 | # This file contains experimental modules
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 |
7 | from models.common import Conv, DWConv
8 | from utils.google_utils import attempt_download
9 |
10 |
11 | class CrossConv(nn.Module):
12 | # Cross Convolution Downsample
13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
15 | super(CrossConv, self).__init__()
16 | c_ = int(c2 * e) # hidden channels
17 | self.cv1 = Conv(c1, c_, (1, k), (1, s))
18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
19 | self.add = shortcut and c1 == c2
20 |
21 | def forward(self, x):
22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
23 |
24 |
25 | class Sum(nn.Module):
26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
27 | def __init__(self, n, weight=False): # n: number of inputs
28 | super(Sum, self).__init__()
29 | self.weight = weight # apply weights boolean
30 | self.iter = range(n - 1) # iter object
31 | if weight:
32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
33 |
34 | def forward(self, x):
35 | y = x[0] # no weight
36 | if self.weight:
37 | w = torch.sigmoid(self.w) * 2
38 | for i in self.iter:
39 | y = y + x[i + 1] * w[i]
40 | else:
41 | for i in self.iter:
42 | y = y + x[i + 1]
43 | return y
44 |
45 |
46 | class GhostConv(nn.Module):
47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet
48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
49 | super(GhostConv, self).__init__()
50 | c_ = c2 // 2 # hidden channels
51 | self.cv1 = Conv(c1, c_, k, s, None, g, act)
52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
53 |
54 | def forward(self, x):
55 | y = self.cv1(x)
56 | return torch.cat([y, self.cv2(y)], 1)
57 |
58 |
59 | class GhostBottleneck(nn.Module):
60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
61 | def __init__(self, c1, c2, k, s):
62 | super(GhostBottleneck, self).__init__()
63 | c_ = c2 // 2
64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
69 |
70 | def forward(self, x):
71 | return self.conv(x) + self.shortcut(x)
72 |
73 |
74 | class MixConv2d(nn.Module):
75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
77 | super(MixConv2d, self).__init__()
78 | groups = len(k)
79 | if equal_ch: # equal c_ per group
80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
82 | else: # equal weight.numel() per group
83 | b = [c2] + [0] * groups
84 | a = np.eye(groups + 1, groups, k=-1)
85 | a -= np.roll(a, 1, axis=1)
86 | a *= np.array(k) ** 2
87 | a[0] = 1
88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
89 |
90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
91 | self.bn = nn.BatchNorm2d(c2)
92 | self.act = nn.LeakyReLU(0.1, inplace=True)
93 |
94 | def forward(self, x):
95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
96 |
97 |
98 | class Ensemble(nn.ModuleList):
99 | # Ensemble of models
100 | def __init__(self):
101 | super(Ensemble, self).__init__()
102 |
103 | def forward(self, x, augment=False):
104 | y = []
105 | for module in self:
106 | y.append(module(x, augment)[0])
107 | # y = torch.stack(y).max(0)[0] # max ensemble
108 | # y = torch.cat(y, 1) # nms ensemble
109 | y = torch.stack(y).mean(0) # mean ensemble
110 | return y, None # inference, train output
111 |
112 |
113 | def attempt_load(weights, map_location=None):
114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
115 | model = Ensemble()
116 | for w in weights if isinstance(weights, list) else [weights]:
117 | attempt_download(w)
118 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model
119 |
120 | # Compatibility updates
121 | for m in model.modules():
122 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
123 | m.inplace = True # pytorch 1.7.0 compatibility
124 | elif type(m) is Conv:
125 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
126 |
127 | if len(model) == 1:
128 | return model[-1] # return model
129 | else:
130 | print('Ensemble created with %s\n' % weights)
131 | for k in ['names', 'stride']:
132 | setattr(model, k, getattr(model[-1], k))
133 | return model # return ensemble
134 |
--------------------------------------------------------------------------------
/models/export.py:
--------------------------------------------------------------------------------
1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
2 |
3 | Usage:
4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
5 | """
6 |
7 | import argparse
8 | import sys
9 | import time
10 |
11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories
12 |
13 | import torch
14 | import torch.nn as nn
15 |
16 | import models
17 | from models.experimental import attempt_load
18 | from utils.activations import Hardswish, SiLU
19 | from utils.general import set_logging, check_img_size
20 |
21 | if __name__ == '__main__':
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size')
26 | opt = parser.parse_args()
27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
28 | print(opt)
29 | set_logging()
30 | t = time.time()
31 |
32 | # Load PyTorch model
33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model
34 | labels = model.names
35 |
36 | # Checks
37 | gs = int(max(model.stride)) # grid size (max stride)
38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
39 |
40 | # Input
41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection
42 |
43 | # Update model
44 | for k, m in model.named_modules():
45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
46 | if isinstance(m, models.common.Conv): # assign export-friendly activations
47 | if isinstance(m.act, nn.Hardswish):
48 | m.act = Hardswish()
49 | elif isinstance(m.act, nn.SiLU):
50 | m.act = SiLU()
51 | # elif isinstance(m, models.yolo.Detect):
52 | # m.forward = m.forward_export # assign forward (optional)
53 | model.model[-1].export = True # set Detect() layer export=True
54 | y = model(img) # dry run
55 |
56 | # TorchScript export
57 | try:
58 | print('\nStarting TorchScript export with torch %s...' % torch.__version__)
59 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename
60 | ts = torch.jit.trace(model, img)
61 | ts.save(f)
62 | print('TorchScript export success, saved as %s' % f)
63 | except Exception as e:
64 | print('TorchScript export failure: %s' % e)
65 |
66 | # ONNX export
67 | try:
68 | import onnx
69 |
70 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
71 | f = opt.weights.replace('.pt', '.onnx') # filename
72 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
73 | output_names=['classes', 'boxes'] if y is None else ['output'])
74 |
75 | # Checks
76 | onnx_model = onnx.load(f) # load onnx model
77 | onnx.checker.check_model(onnx_model) # check onnx model
78 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
79 | print('ONNX export success, saved as %s' % f)
80 | except Exception as e:
81 | print('ONNX export failure: %s' % e)
82 |
83 | # CoreML export
84 | try:
85 | import coremltools as ct
86 |
87 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
88 | # convert model from torchscript and apply pixel scaling as per detect.py
89 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
90 | f = opt.weights.replace('.pt', '.mlmodel') # filename
91 | model.save(f)
92 | print('CoreML export success, saved as %s' % f)
93 | except Exception as e:
94 | print('CoreML export failure: %s' % e)
95 |
96 | # Finish
97 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
98 |
--------------------------------------------------------------------------------
/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # darknet53 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [32, 3, 1]], # 0
16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 | [-1, 1, Bottleneck, [64]],
18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 | [-1, 2, Bottleneck, [128]],
20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 | [-1, 8, Bottleneck, [256]],
22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 | [-1, 8, Bottleneck, [512]],
24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 | [-1, 4, Bottleneck, [1024]], # 10
26 | ]
27 |
28 | # YOLOv3-SPP head
29 | head:
30 | [[-1, 1, Bottleneck, [1024, False]],
31 | [-1, 1, SPP, [512, [5, 9, 13]]],
32 | [-1, 1, Conv, [1024, 3, 1]],
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35 |
36 | [-2, 1, Conv, [256, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 | [-1, 1, Bottleneck, [512, False]],
40 | [-1, 1, Bottleneck, [512, False]],
41 | [-1, 1, Conv, [256, 1, 1]],
42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43 |
44 | [-2, 1, Conv, [128, 1, 1]],
45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 | [-1, 1, Bottleneck, [256, False]],
48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49 |
50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 | ]
52 |
--------------------------------------------------------------------------------
/models/hub/yolov3-tiny.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,14, 23,27, 37,58] # P4/16
9 | - [81,82, 135,169, 344,319] # P5/32
10 |
11 | # YOLOv3-tiny backbone
12 | backbone:
13 | # [from, number, module, args]
14 | [[-1, 1, Conv, [16, 3, 1]], # 0
15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
16 | [-1, 1, Conv, [32, 3, 1]],
17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
18 | [-1, 1, Conv, [64, 3, 1]],
19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
20 | [-1, 1, Conv, [128, 3, 1]],
21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
22 | [-1, 1, Conv, [256, 3, 1]],
23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
24 | [-1, 1, Conv, [512, 3, 1]],
25 | [-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]], # 11
26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
27 | ]
28 |
29 | # YOLOv3-tiny head
30 | head:
31 | [[-1, 1, Conv, [1024, 3, 1]],
32 | [-1, 1, Conv, [256, 1, 1]],
33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
34 |
35 | [-2, 1, Conv, [128, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
39 |
40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
41 | ]
42 |
--------------------------------------------------------------------------------
/models/hub/yolov3.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # darknet53 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [32, 3, 1]], # 0
16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 | [-1, 1, Bottleneck, [64]],
18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 | [-1, 2, Bottleneck, [128]],
20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 | [-1, 8, Bottleneck, [256]],
22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 | [-1, 8, Bottleneck, [512]],
24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 | [-1, 4, Bottleneck, [1024]], # 10
26 | ]
27 |
28 | # YOLOv3 head
29 | head:
30 | [[-1, 1, Bottleneck, [1024, False]],
31 | [-1, 1, Conv, [512, [1, 1]]],
32 | [-1, 1, Conv, [1024, 3, 1]],
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35 |
36 | [-2, 1, Conv, [256, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 | [-1, 1, Bottleneck, [512, False]],
40 | [-1, 1, Bottleneck, [512, False]],
41 | [-1, 1, Conv, [256, 1, 1]],
42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43 |
44 | [-2, 1, Conv, [128, 1, 1]],
45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 | [-1, 1, Bottleneck, [256, False]],
48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49 |
50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 | ]
52 |
--------------------------------------------------------------------------------
/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, Bottleneck, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 6, BottleneckCSP, [1024]], # 9
25 | ]
26 |
27 | # YOLOv5 FPN head
28 | head:
29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
30 |
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
35 |
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 1, Conv, [256, 1, 1]],
39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
40 |
41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
42 | ]
43 |
--------------------------------------------------------------------------------
/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 PANet head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5l.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5m.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 0.67 # model depth multiple
4 | width_multiple: 0.75 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5s.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 0.33 # model depth multiple
4 | width_multiple: 0.50 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/models/yolov5x.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.33 # model depth multiple
4 | width_multiple: 1.25 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # pip install -r requirements.txt
2 |
3 | # base ----------------------------------------
4 | Cython
5 | matplotlib>=3.2.2
6 | numpy>=1.18.5
7 | opencv-python>=4.1.2
8 | Pillow
9 | PyYAML>=5.3
10 | scipy>=1.4.1
11 | tensorboard>=2.2
12 | torch>=1.7.0
13 | torchvision>=0.8.1
14 | tqdm>=4.41.0
15 | requests==2.26.0
16 | pyyaml==5.4.1
17 | pandas==1.3.2
18 |
19 | # plotting ------------------------------------
20 | seaborn>=0.11.0
21 | pandas
22 |
23 | # clip
24 | ftfy==6.0.3
25 | regex==2.5.86
--------------------------------------------------------------------------------
/tool/region_loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from tool.torch_utils import *
4 |
5 |
6 | def build_targets(pred_boxes, target, anchors, num_anchors, num_classes, nH, nW, noobject_scale, object_scale,
7 | sil_thresh, seen):
8 | nB = target.size(0)
9 | nA = num_anchors
10 | nC = num_classes
11 | anchor_step = len(anchors) / num_anchors
12 | conf_mask = torch.ones(nB, nA, nH, nW) * noobject_scale
13 | coord_mask = torch.zeros(nB, nA, nH, nW)
14 | cls_mask = torch.zeros(nB, nA, nH, nW)
15 | tx = torch.zeros(nB, nA, nH, nW)
16 | ty = torch.zeros(nB, nA, nH, nW)
17 | tw = torch.zeros(nB, nA, nH, nW)
18 | th = torch.zeros(nB, nA, nH, nW)
19 | tconf = torch.zeros(nB, nA, nH, nW)
20 | tcls = torch.zeros(nB, nA, nH, nW)
21 |
22 | nAnchors = nA * nH * nW
23 | nPixels = nH * nW
24 | for b in range(nB):
25 | cur_pred_boxes = pred_boxes[b * nAnchors:(b + 1) * nAnchors].t()
26 | cur_ious = torch.zeros(nAnchors)
27 | for t in range(50):
28 | if target[b][t * 5 + 1] == 0:
29 | break
30 | gx = target[b][t * 5 + 1] * nW
31 | gy = target[b][t * 5 + 2] * nH
32 | gw = target[b][t * 5 + 3] * nW
33 | gh = target[b][t * 5 + 4] * nH
34 | cur_gt_boxes = torch.FloatTensor([gx, gy, gw, gh]).repeat(nAnchors, 1).t()
35 | cur_ious = torch.max(cur_ious, bbox_ious(cur_pred_boxes, cur_gt_boxes, x1y1x2y2=False))
36 | conf_mask[b][cur_ious > sil_thresh] = 0
37 | if seen < 12800:
38 | if anchor_step == 4:
39 | tx = torch.FloatTensor(anchors).view(nA, anchor_step).index_select(1, torch.LongTensor([2])).view(1, nA, 1,
40 | 1).repeat(
41 | nB, 1, nH, nW)
42 | ty = torch.FloatTensor(anchors).view(num_anchors, anchor_step).index_select(1, torch.LongTensor([2])).view(
43 | 1, nA, 1, 1).repeat(nB, 1, nH, nW)
44 | else:
45 | tx.fill_(0.5)
46 | ty.fill_(0.5)
47 | tw.zero_()
48 | th.zero_()
49 | coord_mask.fill_(1)
50 |
51 | nGT = 0
52 | nCorrect = 0
53 | for b in range(nB):
54 | for t in range(50):
55 | if target[b][t * 5 + 1] == 0:
56 | break
57 | nGT = nGT + 1
58 | best_iou = 0.0
59 | best_n = -1
60 | min_dist = 10000
61 | gx = target[b][t * 5 + 1] * nW
62 | gy = target[b][t * 5 + 2] * nH
63 | gi = int(gx)
64 | gj = int(gy)
65 | gw = target[b][t * 5 + 3] * nW
66 | gh = target[b][t * 5 + 4] * nH
67 | gt_box = [0, 0, gw, gh]
68 | for n in range(nA):
69 | aw = anchors[anchor_step * n]
70 | ah = anchors[anchor_step * n + 1]
71 | anchor_box = [0, 0, aw, ah]
72 | iou = bbox_iou(anchor_box, gt_box, x1y1x2y2=False)
73 | if anchor_step == 4:
74 | ax = anchors[anchor_step * n + 2]
75 | ay = anchors[anchor_step * n + 3]
76 | dist = pow(((gi + ax) - gx), 2) + pow(((gj + ay) - gy), 2)
77 | if iou > best_iou:
78 | best_iou = iou
79 | best_n = n
80 | elif anchor_step == 4 and iou == best_iou and dist < min_dist:
81 | best_iou = iou
82 | best_n = n
83 | min_dist = dist
84 |
85 | gt_box = [gx, gy, gw, gh]
86 | pred_box = pred_boxes[b * nAnchors + best_n * nPixels + gj * nW + gi]
87 |
88 | coord_mask[b][best_n][gj][gi] = 1
89 | cls_mask[b][best_n][gj][gi] = 1
90 | conf_mask[b][best_n][gj][gi] = object_scale
91 | tx[b][best_n][gj][gi] = target[b][t * 5 + 1] * nW - gi
92 | ty[b][best_n][gj][gi] = target[b][t * 5 + 2] * nH - gj
93 | tw[b][best_n][gj][gi] = math.log(gw / anchors[anchor_step * best_n])
94 | th[b][best_n][gj][gi] = math.log(gh / anchors[anchor_step * best_n + 1])
95 | iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False) # best_iou
96 | tconf[b][best_n][gj][gi] = iou
97 | tcls[b][best_n][gj][gi] = target[b][t * 5]
98 | if iou > 0.5:
99 | nCorrect = nCorrect + 1
100 |
101 | return nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls
102 |
103 |
104 | class RegionLoss(nn.Module):
105 | def __init__(self, num_classes=0, anchors=[], num_anchors=1):
106 | super(RegionLoss, self).__init__()
107 | self.num_classes = num_classes
108 | self.anchors = anchors
109 | self.num_anchors = num_anchors
110 | self.anchor_step = len(anchors) / num_anchors
111 | self.coord_scale = 1
112 | self.noobject_scale = 1
113 | self.object_scale = 5
114 | self.class_scale = 1
115 | self.thresh = 0.6
116 | self.seen = 0
117 |
118 | def forward(self, output, target):
119 | # output : BxAs*(4+1+num_classes)*H*W
120 | t0 = time.time()
121 | nB = output.data.size(0)
122 | nA = self.num_anchors
123 | nC = self.num_classes
124 | nH = output.data.size(2)
125 | nW = output.data.size(3)
126 |
127 | output = output.view(nB, nA, (5 + nC), nH, nW)
128 | x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
129 | y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
130 | w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
131 | h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
132 | conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
133 | cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
134 | cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)
135 | t1 = time.time()
136 |
137 | pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
138 | grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
139 | grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
140 | anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
141 | anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
142 | anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
143 | anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
144 | pred_boxes[0] = x.data + grid_x
145 | pred_boxes[1] = y.data + grid_y
146 | pred_boxes[2] = torch.exp(w.data) * anchor_w
147 | pred_boxes[3] = torch.exp(h.data) * anchor_h
148 | pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
149 | t2 = time.time()
150 |
151 | nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
152 | target.data,
153 | self.anchors, nA,
154 | nC, \
155 | nH, nW,
156 | self.noobject_scale,
157 | self.object_scale,
158 | self.thresh,
159 | self.seen)
160 | cls_mask = (cls_mask == 1)
161 | nProposals = int((conf > 0.25).sum().data[0])
162 |
163 | tx = Variable(tx.cuda())
164 | ty = Variable(ty.cuda())
165 | tw = Variable(tw.cuda())
166 | th = Variable(th.cuda())
167 | tconf = Variable(tconf.cuda())
168 | tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())
169 |
170 | coord_mask = Variable(coord_mask.cuda())
171 | conf_mask = Variable(conf_mask.cuda().sqrt())
172 | cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
173 | cls = cls[cls_mask].view(-1, nC)
174 |
175 | t3 = time.time()
176 |
177 | loss_x = self.coord_scale * nn.MSELoss(reduction='sum')(x * coord_mask, tx * coord_mask) / 2.0
178 | loss_y = self.coord_scale * nn.MSELoss(reduction='sum')(y * coord_mask, ty * coord_mask) / 2.0
179 | loss_w = self.coord_scale * nn.MSELoss(reduction='sum')(w * coord_mask, tw * coord_mask) / 2.0
180 | loss_h = self.coord_scale * nn.MSELoss(reduction='sum')(h * coord_mask, th * coord_mask) / 2.0
181 | loss_conf = nn.MSELoss(reduction='sum')(conf * conf_mask, tconf * conf_mask) / 2.0
182 | loss_cls = self.class_scale * nn.CrossEntropyLoss(reduction='sum')(cls, tcls)
183 | loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
184 | t4 = time.time()
185 | if False:
186 | print('-----------------------------------')
187 | print(' activation : %f' % (t1 - t0))
188 | print(' create pred_boxes : %f' % (t2 - t1))
189 | print(' build targets : %f' % (t3 - t2))
190 | print(' create loss : %f' % (t4 - t3))
191 | print(' total : %f' % (t4 - t0))
192 | print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
193 | self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
194 | loss_conf.data[0], loss_cls.data[0], loss.data[0]))
195 | return loss
196 |
--------------------------------------------------------------------------------
/tool/torch_utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import time
4 | import math
5 | import torch
6 | import numpy as np
7 | from torch.autograd import Variable
8 |
9 | import itertools
10 | import struct # get_image_size
11 | import imghdr # get_image_size
12 |
13 | from tool import utils
14 |
15 |
16 | def bbox_ious(boxes1, boxes2, x1y1x2y2=True):
17 | if x1y1x2y2:
18 | mx = torch.min(boxes1[0], boxes2[0])
19 | Mx = torch.max(boxes1[2], boxes2[2])
20 | my = torch.min(boxes1[1], boxes2[1])
21 | My = torch.max(boxes1[3], boxes2[3])
22 | w1 = boxes1[2] - boxes1[0]
23 | h1 = boxes1[3] - boxes1[1]
24 | w2 = boxes2[2] - boxes2[0]
25 | h2 = boxes2[3] - boxes2[1]
26 | else:
27 | mx = torch.min(boxes1[0] - boxes1[2] / 2.0, boxes2[0] - boxes2[2] / 2.0)
28 | Mx = torch.max(boxes1[0] + boxes1[2] / 2.0, boxes2[0] + boxes2[2] / 2.0)
29 | my = torch.min(boxes1[1] - boxes1[3] / 2.0, boxes2[1] - boxes2[3] / 2.0)
30 | My = torch.max(boxes1[1] + boxes1[3] / 2.0, boxes2[1] + boxes2[3] / 2.0)
31 | w1 = boxes1[2]
32 | h1 = boxes1[3]
33 | w2 = boxes2[2]
34 | h2 = boxes2[3]
35 | uw = Mx - mx
36 | uh = My - my
37 | cw = w1 + w2 - uw
38 | ch = h1 + h2 - uh
39 | mask = ((cw <= 0) + (ch <= 0) > 0)
40 | area1 = w1 * h1
41 | area2 = w2 * h2
42 | carea = cw * ch
43 | carea[mask] = 0
44 | uarea = area1 + area2 - carea
45 | return carea / uarea
46 |
47 |
48 | def get_region_boxes(boxes_and_confs):
49 |
50 | # print('Getting boxes from boxes and confs ...')
51 |
52 | boxes_list = []
53 | confs_list = []
54 |
55 | for item in boxes_and_confs:
56 | boxes_list.append(item[0])
57 | confs_list.append(item[1])
58 |
59 | # boxes: [batch, num1 + num2 + num3, 1, 4]
60 | # confs: [batch, num1 + num2 + num3, num_classes]
61 | boxes = torch.cat(boxes_list, dim=1)
62 | confs = torch.cat(confs_list, dim=1)
63 |
64 | return [boxes, confs]
65 |
66 |
67 | def convert2cpu(gpu_matrix):
68 | return torch.FloatTensor(gpu_matrix.size()).copy_(gpu_matrix)
69 |
70 |
71 | def convert2cpu_long(gpu_matrix):
72 | return torch.LongTensor(gpu_matrix.size()).copy_(gpu_matrix)
73 |
74 |
75 |
76 | def do_detect(model, img, conf_thresh, nms_thresh, use_cuda=1):
77 | model.eval()
78 | t0 = time.time()
79 |
80 | if type(img) == np.ndarray and len(img.shape) == 3: # cv2 image
81 | img = torch.from_numpy(img.transpose(2, 0, 1)).float().div(255.0).unsqueeze(0)
82 | elif type(img) == np.ndarray and len(img.shape) == 4:
83 | img = torch.from_numpy(img.transpose(0, 3, 1, 2)).float().div(255.0)
84 | else:
85 | print("unknow image type")
86 | exit(-1)
87 |
88 | if use_cuda:
89 | img = img.cuda()
90 | img = torch.autograd.Variable(img)
91 |
92 | t1 = time.time()
93 |
94 | output = model(img)
95 |
96 | t2 = time.time()
97 |
98 | print('-----------------------------------')
99 | print(' Preprocess : %f' % (t1 - t0))
100 | print(' Model Inference : %f' % (t2 - t1))
101 | print('-----------------------------------')
102 |
103 | return utils.post_processing(img, conf_thresh, nms_thresh, output)
104 |
105 |
--------------------------------------------------------------------------------
/tool/utils.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | import time
4 | import math
5 | import numpy as np
6 |
7 | import itertools
8 | import struct # get_image_size
9 | import imghdr # get_image_size
10 |
11 |
12 | def sigmoid(x):
13 | return 1.0 / (np.exp(-x) + 1.)
14 |
15 |
16 | def softmax(x):
17 | x = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
18 | x = x / np.expand_dims(x.sum(axis=1), axis=1)
19 | return x
20 |
21 |
22 | def bbox_iou(box1, box2, x1y1x2y2=True):
23 |
24 | # print('iou box1:', box1)
25 | # print('iou box2:', box2)
26 |
27 | if x1y1x2y2:
28 | mx = min(box1[0], box2[0])
29 | Mx = max(box1[2], box2[2])
30 | my = min(box1[1], box2[1])
31 | My = max(box1[3], box2[3])
32 | w1 = box1[2] - box1[0]
33 | h1 = box1[3] - box1[1]
34 | w2 = box2[2] - box2[0]
35 | h2 = box2[3] - box2[1]
36 | else:
37 | w1 = box1[2]
38 | h1 = box1[3]
39 | w2 = box2[2]
40 | h2 = box2[3]
41 |
42 | mx = min(box1[0], box2[0])
43 | Mx = max(box1[0] + w1, box2[0] + w2)
44 | my = min(box1[1], box2[1])
45 | My = max(box1[1] + h1, box2[1] + h2)
46 | uw = Mx - mx
47 | uh = My - my
48 | cw = w1 + w2 - uw
49 | ch = h1 + h2 - uh
50 | carea = 0
51 | if cw <= 0 or ch <= 0:
52 | return 0.0
53 |
54 | area1 = w1 * h1
55 | area2 = w2 * h2
56 | carea = cw * ch
57 | uarea = area1 + area2 - carea
58 | return carea / uarea
59 |
60 |
61 | def nms_cpu(boxes, confs, nms_thresh=0.5, min_mode=False):
62 | # print(boxes.shape)
63 | x1 = boxes[:, 0]
64 | y1 = boxes[:, 1]
65 | x2 = boxes[:, 2]
66 | y2 = boxes[:, 3]
67 |
68 | areas = (x2 - x1) * (y2 - y1)
69 | order = confs.argsort()[::-1]
70 |
71 | keep = []
72 | while order.size > 0:
73 | idx_self = order[0]
74 | idx_other = order[1:]
75 |
76 | keep.append(idx_self)
77 |
78 | xx1 = np.maximum(x1[idx_self], x1[idx_other])
79 | yy1 = np.maximum(y1[idx_self], y1[idx_other])
80 | xx2 = np.minimum(x2[idx_self], x2[idx_other])
81 | yy2 = np.minimum(y2[idx_self], y2[idx_other])
82 |
83 | w = np.maximum(0.0, xx2 - xx1)
84 | h = np.maximum(0.0, yy2 - yy1)
85 | inter = w * h
86 |
87 | if min_mode:
88 | over = inter / np.minimum(areas[order[0]], areas[order[1:]])
89 | else:
90 | over = inter / (areas[order[0]] + areas[order[1:]] - inter)
91 |
92 | inds = np.where(over <= nms_thresh)[0]
93 | order = order[inds + 1]
94 |
95 | return np.array(keep)
96 |
97 |
98 |
99 | def plot_boxes_cv2(img, boxes, savename=None, class_names=None, color=None):
100 | import cv2
101 | img = np.copy(img)
102 | colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)
103 |
104 | def get_color(c, x, max_val):
105 | ratio = float(x) / max_val * 5
106 | i = int(math.floor(ratio))
107 | j = int(math.ceil(ratio))
108 | ratio = ratio - i
109 | r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
110 | return int(r * 255)
111 |
112 | width = img.shape[1]
113 | height = img.shape[0]
114 | for i in range(len(boxes)):
115 | box = boxes[i]
116 | x1 = int(box[0] * width)
117 | y1 = int(box[1] * height)
118 | x2 = int(box[2] * width)
119 | y2 = int(box[3] * height)
120 |
121 | if color:
122 | rgb = color
123 | else:
124 | rgb = (255, 0, 0)
125 | if len(box) >= 7 and class_names:
126 | cls_conf = box[5]
127 | cls_id = box[6]
128 | print('%s: %f' % (class_names[cls_id], cls_conf))
129 | classes = len(class_names)
130 | offset = cls_id * 123457 % classes
131 | red = get_color(2, offset, classes)
132 | green = get_color(1, offset, classes)
133 | blue = get_color(0, offset, classes)
134 | if color is None:
135 | rgb = (red, green, blue)
136 | img = cv2.putText(img, class_names[cls_id], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1.2, rgb, 1)
137 | img = cv2.rectangle(img, (x1, y1), (x2, y2), rgb, 1)
138 | if savename:
139 | print("save plot results to %s" % savename)
140 | cv2.imwrite(savename, img)
141 | return img
142 |
143 |
144 | def read_truths(lab_path):
145 | if not os.path.exists(lab_path):
146 | return np.array([])
147 | if os.path.getsize(lab_path):
148 | truths = np.loadtxt(lab_path)
149 | truths = truths.reshape(truths.size / 5, 5) # to avoid single truth problem
150 | return truths
151 | else:
152 | return np.array([])
153 |
154 |
155 | def load_class_names(namesfile):
156 | class_names = []
157 | with open(namesfile, 'r') as fp:
158 | lines = fp.readlines()
159 | for line in lines:
160 | line = line.rstrip()
161 | class_names.append(line)
162 | return class_names
163 |
164 |
165 |
166 | def post_processing(img, conf_thresh, nms_thresh, output):
167 |
168 | # anchors = [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401]
169 | # num_anchors = 9
170 | # anchor_masks = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
171 | # strides = [8, 16, 32]
172 | # anchor_step = len(anchors) // num_anchors
173 |
174 | # [batch, num, 1, 4]
175 | box_array = output[0]
176 | # [batch, num, num_classes]
177 | confs = output[1]
178 |
179 | t1 = time.time()
180 |
181 | if type(box_array).__name__ != 'ndarray':
182 | box_array = box_array.cpu().detach().numpy()
183 | confs = confs.cpu().detach().numpy()
184 |
185 | num_classes = confs.shape[2]
186 |
187 | # [batch, num, 4]
188 | box_array = box_array[:, :, 0]
189 |
190 | # [batch, num, num_classes] --> [batch, num]
191 | max_conf = np.max(confs, axis=2)
192 | max_id = np.argmax(confs, axis=2)
193 |
194 | t2 = time.time()
195 |
196 | bboxes_batch = []
197 | for i in range(box_array.shape[0]):
198 |
199 | argwhere = max_conf[i] > conf_thresh
200 | l_box_array = box_array[i, argwhere, :]
201 | l_max_conf = max_conf[i, argwhere]
202 | l_max_id = max_id[i, argwhere]
203 |
204 | bboxes = []
205 | # nms for each class
206 | for j in range(num_classes):
207 |
208 | cls_argwhere = l_max_id == j
209 | ll_box_array = l_box_array[cls_argwhere, :]
210 | ll_max_conf = l_max_conf[cls_argwhere]
211 | ll_max_id = l_max_id[cls_argwhere]
212 |
213 | keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)
214 |
215 | if (keep.size > 0):
216 | ll_box_array = ll_box_array[keep, :]
217 | ll_max_conf = ll_max_conf[keep]
218 | ll_max_id = ll_max_id[keep]
219 |
220 | for k in range(ll_box_array.shape[0]):
221 | bboxes.append([ll_box_array[k, 0], ll_box_array[k, 1], ll_box_array[k, 2], ll_box_array[k, 3], ll_max_conf[k], ll_max_conf[k], ll_max_id[k]])
222 |
223 | bboxes_batch.append(bboxes)
224 |
225 | t3 = time.time()
226 |
227 | print('-----------------------------------')
228 | print(' max and argmax : %f' % (t2 - t1))
229 | print(' nms : %f' % (t3 - t2))
230 | print('Post processing total : %f' % (t3 - t1))
231 | print('-----------------------------------')
232 |
233 | return bboxes_batch
234 |
--------------------------------------------------------------------------------
/tool/utils_iou.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | '''
3 |
4 | '''
5 | import torch
6 | import os, sys
7 | from torch.nn import functional as F
8 |
9 | import numpy as np
10 | from packaging import version
11 |
12 |
13 | __all__ = [
14 | "bboxes_iou",
15 | "bboxes_giou",
16 | "bboxes_diou",
17 | "bboxes_ciou",
18 | ]
19 |
20 |
21 | if version.parse(torch.__version__) >= version.parse('1.5.0'):
22 | def _true_divide(dividend, divisor):
23 | return torch.true_divide(dividend, divisor)
24 | else:
25 | def _true_divide(dividend, divisor):
26 | return dividend / divisor
27 |
28 | def bboxes_iou(bboxes_a, bboxes_b, fmt='voc', iou_type='iou'):
29 | """Calculate the Intersection of Unions (IoUs) between bounding boxes.
30 | IoU is calculated as a ratio of area of the intersection
31 | and area of the union.
32 |
33 | Args:
34 | bbox_a (array): An array whose shape is :math:`(N, 4)`.
35 | :math:`N` is the number of bounding boxes.
36 | The dtype should be :obj:`numpy.float32`.
37 | bbox_b (array): An array similar to :obj:`bbox_a`,
38 | whose shape is :math:`(K, 4)`.
39 | The dtype should be :obj:`numpy.float32`.
40 | Returns:
41 | array:
42 | An array whose shape is :math:`(N, K)`. \
43 | An element at index :math:`(n, k)` contains IoUs between \
44 | :math:`n` th bounding box in :obj:`bbox_a` and :math:`k` th bounding \
45 | box in :obj:`bbox_b`.
46 |
47 | from: https://github.com/chainer/chainercv
48 | """
49 | if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
50 | raise IndexError
51 |
52 | N, K = bboxes_a.shape[0], bboxes_b.shape[0]
53 |
54 | if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
55 | # top left
56 | tl_intersect = torch.max(
57 | bboxes_a[:, np.newaxis, :2],
58 | bboxes_b[:, :2]
59 | ) # of shape `(N,K,2)`
60 | # bottom right
61 | br_intersect = torch.min(
62 | bboxes_a[:, np.newaxis, 2:],
63 | bboxes_b[:, 2:]
64 | )
65 | bb_a = bboxes_a[:, 2:] - bboxes_a[:, :2]
66 | bb_b = bboxes_b[:, 2:] - bboxes_b[:, :2]
67 | # bb_* can also be seen vectors representing box_width, box_height
68 | elif fmt.lower() == 'yolo': # xcen, ycen, w, h
69 | # top left
70 | tl_intersect = torch.max(
71 | bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
72 | bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
73 | )
74 | # bottom right
75 | br_intersect = torch.min(
76 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
77 | bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
78 | )
79 | bb_a = bboxes_a[:, 2:]
80 | bb_b = bboxes_b[:, 2:]
81 | elif fmt.lower() == 'coco': # xmin, ymin, w, h
82 | # top left
83 | tl_intersect = torch.max(
84 | bboxes_a[:, np.newaxis, :2],
85 | bboxes_b[:, :2]
86 | )
87 | # bottom right
88 | br_intersect = torch.min(
89 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
90 | bboxes_b[:, :2] + bboxes_b[:, 2:]
91 | )
92 | bb_a = bboxes_a[:, 2:]
93 | bb_b = bboxes_b[:, 2:]
94 |
95 | area_a = torch.prod(bb_a, 1)
96 | area_b = torch.prod(bb_b, 1)
97 |
98 | # torch.prod(input, dim, keepdim=False, dtype=None) → Tensor
99 | # Returns the product of each row of the input tensor in the given dimension dim
100 | # if tl, br does not form a nondegenerate squre, then the corr. element in the `prod` would be 0
101 | en = (tl_intersect < br_intersect).type(tl_intersect.type()).prod(dim=2) # shape `(N,K,2)` ---> shape `(N,K)`
102 |
103 | area_intersect = torch.prod(br_intersect - tl_intersect, 2) * en # * ((tl < br).all())
104 | area_union = (area_a[:, np.newaxis] + area_b - area_intersect)
105 |
106 | iou = _true_divide(area_intersect, area_union)
107 |
108 | if iou_type.lower() == 'iou':
109 | return iou
110 |
111 | if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
112 | # top left
113 | tl_union = torch.min(
114 | bboxes_a[:, np.newaxis, :2],
115 | bboxes_b[:, :2]
116 | ) # of shape `(N,K,2)`
117 | # bottom right
118 | br_union = torch.max(
119 | bboxes_a[:, np.newaxis, 2:],
120 | bboxes_b[:, 2:]
121 | )
122 | elif fmt.lower() == 'yolo': # xcen, ycen, w, h
123 | # top left
124 | tl_union = torch.min(
125 | bboxes_a[:, np.newaxis, :2] - bboxes_a[:, np.newaxis, 2:] / 2,
126 | bboxes_b[:, :2] - bboxes_b[:, 2:] / 2
127 | )
128 | # bottom right
129 | br_union = torch.max(
130 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:] / 2,
131 | bboxes_b[:, :2] + bboxes_b[:, 2:] / 2
132 | )
133 | elif fmt.lower() == 'coco': # xmin, ymin, w, h
134 | # top left
135 | tl_union = torch.min(
136 | bboxes_a[:, np.newaxis, :2],
137 | bboxes_b[:, :2]
138 | )
139 | # bottom right
140 | br_union = torch.max(
141 | bboxes_a[:, np.newaxis, :2] + bboxes_a[:, np.newaxis, 2:],
142 | bboxes_b[:, :2] + bboxes_b[:, 2:]
143 | )
144 |
145 | # c for covering, of shape `(N,K,2)`
146 | # the last dim is box width, box hight
147 | bboxes_c = br_union - tl_union
148 |
149 | area_covering = torch.prod(bboxes_c, 2) # shape `(N,K)`
150 |
151 | giou = iou - _true_divide(area_covering - area_union, area_covering)
152 |
153 | if iou_type.lower() == 'giou':
154 | return giou
155 |
156 | if fmt.lower() == 'voc': # xmin, ymin, xmax, ymax
157 | centre_a = (bboxes_a[..., 2 :] + bboxes_a[..., : 2]) / 2
158 | centre_b = (bboxes_b[..., 2 :] + bboxes_b[..., : 2]) / 2
159 | elif fmt.lower() == 'yolo': # xcen, ycen, w, h
160 | centre_a = bboxes_a[..., : 2]
161 | centre_b = bboxes_b[..., : 2]
162 | elif fmt.lower() == 'coco': # xmin, ymin, w, h
163 | centre_a = bboxes_a[..., 2 :] + bboxes_a[..., : 2]/2
164 | centre_b = bboxes_b[..., 2 :] + bboxes_b[..., : 2]/2
165 |
166 | centre_dist = torch.norm(centre_a[:, np.newaxis] - centre_b, p='fro', dim=2)
167 | diag_len = torch.norm(bboxes_c, p='fro', dim=2)
168 |
169 | diou = iou - _true_divide(centre_dist.pow(2), diag_len.pow(2))
170 |
171 | if iou_type.lower() == 'diou':
172 | return diou
173 |
174 | """ the legacy custom cosine similarity:
175 |
176 | # bb_a of shape `(N,2)`, bb_b of shape `(K,2)`
177 | v = torch.einsum('nm,km->nk', bb_a, bb_b)
178 | v = _true_divide(v, (torch.norm(bb_a, p='fro', dim=1)[:,np.newaxis] * torch.norm(bb_b, p='fro', dim=1)))
179 | # avoid nan for torch.acos near \pm 1
180 | # https://github.com/pytorch/pytorch/issues/8069
181 | eps = 1e-7
182 | v = torch.clamp(v, -1+eps, 1-eps)
183 | """
184 | v = F.cosine_similarity(bb_a[:,np.newaxis,:], bb_b, dim=-1)
185 | v = (_true_divide(2*torch.acos(v), np.pi)).pow(2)
186 | with torch.no_grad():
187 | alpha = (_true_divide(v, 1-iou+v)) * ((iou>=0.5).type(iou.type()))
188 |
189 | ciou = diou - alpha * v
190 |
191 | if iou_type.lower() == 'ciou':
192 | return ciou
193 |
194 |
195 | def bboxes_giou(bboxes_a, bboxes_b, fmt='voc'):
196 | return bboxes_iou(bboxes_a, bboxes_b, fmt, 'giou')
197 |
198 |
199 | def bboxes_diou(bboxes_a, bboxes_b, fmt='voc'):
200 | return bboxes_iou(bboxes_a, bboxes_b, fmt, 'diou')
201 |
202 |
203 | def bboxes_ciou(bboxes_a, bboxes_b, fmt='voc'):
204 | return bboxes_iou(bboxes_a, bboxes_b, fmt, 'ciou')
205 |
--------------------------------------------------------------------------------
/tools/freeze_model.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import argparse
3 | import tensorflow as tf
4 | import tensorflow.contrib.slim as slim
5 |
6 |
7 | def _batch_norm_fn(x, scope=None):
8 | if scope is None:
9 | scope = tf.get_variable_scope().name + "/bn"
10 | return slim.batch_norm(x, scope=scope)
11 |
12 |
13 | def create_link(
14 | incoming, network_builder, scope, nonlinearity=tf.nn.elu,
15 | weights_initializer=tf.truncated_normal_initializer(stddev=1e-3),
16 | regularizer=None, is_first=False, summarize_activations=True):
17 | if is_first:
18 | network = incoming
19 | else:
20 | network = _batch_norm_fn(incoming, scope=scope + "/bn")
21 | network = nonlinearity(network)
22 | if summarize_activations:
23 | tf.summary.histogram(scope+"/activations", network)
24 |
25 | pre_block_network = network
26 | post_block_network = network_builder(pre_block_network, scope)
27 |
28 | incoming_dim = pre_block_network.get_shape().as_list()[-1]
29 | outgoing_dim = post_block_network.get_shape().as_list()[-1]
30 | if incoming_dim != outgoing_dim:
31 | assert outgoing_dim == 2 * incoming_dim, \
32 | "%d != %d" % (outgoing_dim, 2 * incoming)
33 | projection = slim.conv2d(
34 | incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None,
35 | scope=scope+"/projection", weights_initializer=weights_initializer,
36 | biases_initializer=None, weights_regularizer=regularizer)
37 | network = projection + post_block_network
38 | else:
39 | network = incoming + post_block_network
40 | return network
41 |
42 |
43 | def create_inner_block(
44 | incoming, scope, nonlinearity=tf.nn.elu,
45 | weights_initializer=tf.truncated_normal_initializer(1e-3),
46 | bias_initializer=tf.zeros_initializer(), regularizer=None,
47 | increase_dim=False, summarize_activations=True):
48 | n = incoming.get_shape().as_list()[-1]
49 | stride = 1
50 | if increase_dim:
51 | n *= 2
52 | stride = 2
53 |
54 | incoming = slim.conv2d(
55 | incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
56 | normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
57 | biases_initializer=bias_initializer, weights_regularizer=regularizer,
58 | scope=scope + "/1")
59 | if summarize_activations:
60 | tf.summary.histogram(incoming.name + "/activations", incoming)
61 |
62 | incoming = slim.dropout(incoming, keep_prob=0.6)
63 |
64 | incoming = slim.conv2d(
65 | incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
66 | normalizer_fn=None, weights_initializer=weights_initializer,
67 | biases_initializer=bias_initializer, weights_regularizer=regularizer,
68 | scope=scope + "/2")
69 | return incoming
70 |
71 |
72 | def residual_block(incoming, scope, nonlinearity=tf.nn.elu,
73 | weights_initializer=tf.truncated_normal_initializer(1e3),
74 | bias_initializer=tf.zeros_initializer(), regularizer=None,
75 | increase_dim=False, is_first=False,
76 | summarize_activations=True):
77 |
78 | def network_builder(x, s):
79 | return create_inner_block(
80 | x, s, nonlinearity, weights_initializer, bias_initializer,
81 | regularizer, increase_dim, summarize_activations)
82 |
83 | return create_link(
84 | incoming, network_builder, scope, nonlinearity, weights_initializer,
85 | regularizer, is_first, summarize_activations)
86 |
87 |
88 | def _create_network(incoming, reuse=None, weight_decay=1e-8):
89 | nonlinearity = tf.nn.elu
90 | conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
91 | conv_bias_init = tf.zeros_initializer()
92 | conv_regularizer = slim.l2_regularizer(weight_decay)
93 | fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
94 | fc_bias_init = tf.zeros_initializer()
95 | fc_regularizer = slim.l2_regularizer(weight_decay)
96 |
97 | def batch_norm_fn(x):
98 | return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")
99 |
100 | network = incoming
101 | network = slim.conv2d(
102 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
103 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1",
104 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
105 | weights_regularizer=conv_regularizer)
106 | network = slim.conv2d(
107 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
108 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2",
109 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
110 | weights_regularizer=conv_regularizer)
111 |
112 | # NOTE(nwojke): This is missing a padding="SAME" to match the CNN
113 | # architecture in Table 1 of the paper. Information on how this affects
114 | # performance on MOT 16 training sequences can be found in
115 | # issue 10 https://github.com/nwojke/deep_sort/issues/10
116 | network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")
117 |
118 | network = residual_block(
119 | network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init,
120 | conv_regularizer, increase_dim=False, is_first=True)
121 | network = residual_block(
122 | network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init,
123 | conv_regularizer, increase_dim=False)
124 |
125 | network = residual_block(
126 | network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init,
127 | conv_regularizer, increase_dim=True)
128 | network = residual_block(
129 | network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init,
130 | conv_regularizer, increase_dim=False)
131 |
132 | network = residual_block(
133 | network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init,
134 | conv_regularizer, increase_dim=True)
135 | network = residual_block(
136 | network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init,
137 | conv_regularizer, increase_dim=False)
138 |
139 | feature_dim = network.get_shape().as_list()[-1]
140 | network = slim.flatten(network)
141 |
142 | network = slim.dropout(network, keep_prob=0.6)
143 | network = slim.fully_connected(
144 | network, feature_dim, activation_fn=nonlinearity,
145 | normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer,
146 | scope="fc1", weights_initializer=fc_weight_init,
147 | biases_initializer=fc_bias_init)
148 |
149 | features = network
150 |
151 | # Features in rows, normalize axis 1.
152 | features = slim.batch_norm(features, scope="ball", reuse=reuse)
153 | feature_norm = tf.sqrt(
154 | tf.constant(1e-8, tf.float32) +
155 | tf.reduce_sum(tf.square(features), [1], keepdims=True))
156 | features = features / feature_norm
157 | return features, None
158 |
159 |
160 | def _network_factory(weight_decay=1e-8):
161 |
162 | def factory_fn(image, reuse):
163 | with slim.arg_scope([slim.batch_norm, slim.dropout],
164 | is_training=False):
165 | with slim.arg_scope([slim.conv2d, slim.fully_connected,
166 | slim.batch_norm, slim.layer_norm],
167 | reuse=reuse):
168 | features, logits = _create_network(
169 | image, reuse=reuse, weight_decay=weight_decay)
170 | return features, logits
171 |
172 | return factory_fn
173 |
174 |
175 | def _preprocess(image):
176 | image = image[:, :, ::-1] # BGR to RGB
177 | return image
178 |
179 |
180 | def parse_args():
181 | """Parse command line arguments.
182 | """
183 | parser = argparse.ArgumentParser(description="Freeze old model")
184 | parser.add_argument(
185 | "--checkpoint_in",
186 | default="resources/networks/mars-small128.ckpt-68577",
187 | help="Path to checkpoint file")
188 | parser.add_argument(
189 | "--graphdef_out",
190 | default="resources/networks/mars-small128.pb")
191 | return parser.parse_args()
192 |
193 |
194 | def main():
195 | args = parse_args()
196 |
197 | with tf.Session(graph=tf.Graph()) as session:
198 | input_var = tf.placeholder(
199 | tf.uint8, (None, 128, 64, 3), name="images")
200 | image_var = tf.map_fn(
201 | lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
202 | back_prop=False)
203 |
204 | factory_fn = _network_factory()
205 | features, _ = factory_fn(image_var, reuse=None)
206 | features = tf.identity(features, name="features")
207 |
208 | saver = tf.train.Saver(slim.get_variables_to_restore())
209 | saver.restore(session, args.checkpoint_in)
210 |
211 | output_graph_def = tf.graph_util.convert_variables_to_constants(
212 | session, tf.get_default_graph().as_graph_def(),
213 | [features.name.split(":")[0]])
214 | with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle:
215 | file_handle.write(output_graph_def.SerializeToString())
216 |
217 |
218 | if __name__ == "__main__":
219 | main()
220 |
--------------------------------------------------------------------------------
/tools/generate_clip_detections.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import os
3 | import errno
4 | import argparse
5 | import numpy as np
6 | import cv2
7 | import torch
8 | from PIL import Image
9 |
10 |
11 | def _run_in_batches(f, data_dict, out, batch_size):
12 | data_len = len(out)
13 | num_batches = int(data_len / batch_size)
14 |
15 | s, e = 0, 0
16 | for i in range(num_batches):
17 | s, e = i * batch_size, (i + 1) * batch_size
18 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
19 | out[s:e] = f(batch_data_dict)
20 | if e < len(out):
21 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
22 | out[e:] = f(batch_data_dict)
23 |
24 |
25 | def extract_image_patch(image, bbox, patch_shape=None):
26 | """Extract image patch from bounding box.
27 |
28 | Parameters
29 | ----------
30 | image : ndarray
31 | The full image.
32 | bbox : array_like
33 | The bounding box in format (x, y, width, height).
34 | patch_shape : Optional[array_like]
35 | This parameter can be used to enforce a desired patch shape
36 | (height, width). First, the `bbox` is adapted to the aspect ratio
37 | of the patch shape, then it is clipped at the image boundaries.
38 | If None, the shape is computed from :arg:`bbox`.
39 |
40 | Returns
41 | -------
42 | ndarray | NoneType
43 | An image patch showing the :arg:`bbox`, optionally reshaped to
44 | :arg:`patch_shape`.
45 | Returns None if the bounding box is empty or fully outside of the image
46 | boundaries.
47 |
48 | """
49 | bbox = np.array(bbox.cpu())
50 | if patch_shape is not None:
51 | # correct aspect ratio to patch shape
52 | target_aspect = float(patch_shape[1]) / patch_shape[0]
53 | new_width = target_aspect * bbox[3]
54 | bbox[0] -= (new_width - bbox[2]) / 2
55 | bbox[2] = new_width
56 |
57 | # convert to top left, bottom right
58 | bbox[2:] += bbox[:2]
59 | bbox = bbox.astype(np.int)
60 |
61 | # clip at image boundaries
62 | bbox[:2] = np.maximum(0, bbox[:2])
63 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
64 | if np.any(bbox[:2] >= bbox[2:]):
65 | return None
66 | sx, sy, ex, ey = bbox
67 | image = image[sy:ey, sx:ex]
68 |
69 | #image = cv2.resize(image, tuple(patch_shape[::-1]))
70 | return image
71 |
72 |
73 | class ImageEncoder(object):
74 |
75 | def __init__(self, model, transform, device):
76 |
77 |
78 | self.model = model
79 | self.transform = transform
80 | self.device = device
81 |
82 | def __call__(self, data_x, batch_size=32):
83 | out = []
84 | #data_x = [i for i in data_x if i is not None]
85 |
86 | #print("[ZSOT ImageEncoder] num_none: {}".format(len(num_none)))
87 | for patch in range(len(data_x)):
88 | if self.device == "cpu":
89 | img = self.transform(Image.fromarray(data_x[patch]))
90 | else:
91 | img = self.transform(Image.fromarray(data_x[patch])).cuda()
92 | out.append(img)
93 |
94 | features = self.model.encode_image(torch.stack(out)).cpu().numpy()
95 | for idx, i in enumerate(features):
96 | if np.isnan(i[0]):
97 | print("nan values")
98 | # features[idx] = np.zeros(512)
99 | # cv2.imshow("image", data_x[idx])
100 | # cv2.waitKey(0)
101 |
102 | return features
103 |
104 |
105 | def create_box_encoder(model, transform, batch_size=32, device="cpu"):
106 | image_encoder = ImageEncoder(model, transform, device)
107 |
108 | def encoder(image, boxes):
109 | image_patches = []
110 | for box in boxes:
111 | #print("extracting box {} from image {}".format(box, image.shape))
112 | patch = extract_image_patch(image, box)
113 |
114 | if patch is None:
115 | print("WARNING: Failed to extract image patch: %s." % str(box))
116 | patch = np.random.uniform(
117 | 0., 255., image.shape).astype(np.uint8)
118 | image_patches.append(patch)
119 | #image_patches = np.array(image_patches)
120 | return image_encoder(image_patches, batch_size)
121 |
122 | return encoder
123 |
124 |
125 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
126 | """Generate detections with features.
127 |
128 | Parameters
129 | ----------
130 | encoder : Callable[image, ndarray] -> ndarray
131 | The encoder function takes as input a BGR color image and a matrix of
132 | bounding boxes in format `(x, y, w, h)` and returns a matrix of
133 | corresponding feature vectors.
134 | mot_dir : str
135 | Path to the MOTChallenge directory (can be either train or test).
136 | output_dir
137 | Path to the output directory. Will be created if it does not exist.
138 | detection_dir
139 | Path to custom detections. The directory structure should be the default
140 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
141 | standard MOTChallenge detections.
142 |
143 | """
144 | if detection_dir is None:
145 | detection_dir = mot_dir
146 | try:
147 | os.makedirs(output_dir)
148 | except OSError as exception:
149 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
150 | pass
151 | else:
152 | raise ValueError(
153 | "Failed to created output directory '%s'" % output_dir)
154 |
155 | for sequence in os.listdir(mot_dir):
156 | print("Processing %s" % sequence)
157 | sequence_dir = os.path.join(mot_dir, sequence)
158 |
159 | image_dir = os.path.join(sequence_dir, "img1")
160 | image_filenames = {
161 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
162 | for f in os.listdir(image_dir)}
163 |
164 | detection_file = os.path.join(
165 | detection_dir, sequence, "det/det.txt")
166 | detections_in = np.loadtxt(detection_file, delimiter=',')
167 | detections_out = []
168 |
169 | frame_indices = detections_in[:, 0].astype(np.int)
170 | min_frame_idx = frame_indices.astype(np.int).min()
171 | max_frame_idx = frame_indices.astype(np.int).max()
172 | for frame_idx in range(min_frame_idx, max_frame_idx + 1):
173 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
174 | mask = frame_indices == frame_idx
175 | rows = detections_in[mask]
176 |
177 | if frame_idx not in image_filenames:
178 | print("WARNING could not find image for frame %d" % frame_idx)
179 | continue
180 | bgr_image = cv2.imread(
181 | image_filenames[frame_idx], cv2.IMREAD_COLOR)
182 | features = encoder(bgr_image, rows[:, 2:6].copy())
183 | detections_out += [np.r_[(row, feature)] for row, feature
184 | in zip(rows, features)]
185 |
186 | output_filename = os.path.join(output_dir, "%s.npy" % sequence)
187 | np.save(
188 | output_filename, np.asarray(detections_out), allow_pickle=False)
189 |
190 |
191 | def parse_args():
192 | """Parse command line arguments.
193 | """
194 | parser = argparse.ArgumentParser(description="Re-ID feature extractor")
195 | parser.add_argument(
196 | "--model",
197 | default="resources/networks/mars-small128.pb",
198 | help="Path to freezed inference graph protobuf.")
199 | parser.add_argument(
200 | "--mot_dir", help="Path to MOTChallenge directory (train or test)",
201 | required=True)
202 | parser.add_argument(
203 | "--detection_dir", help="Path to custom detections. Defaults to "
204 | "standard MOT detections Directory structure should be the default "
205 | "MOTChallenge structure: [sequence]/det/det.txt", default=None)
206 | parser.add_argument(
207 | "--output_dir", help="Output directory. Will be created if it does not"
208 | " exist.", default="detections")
209 | return parser.parse_args()
210 |
211 |
212 | def main():
213 | args = parse_args()
214 | encoder = create_box_encoder(args.model, batch_size=32)
215 | generate_detections(encoder, args.mot_dir, args.output_dir,
216 | args.detection_dir)
217 |
218 |
219 | if __name__ == "__main__":
220 | main()
221 |
--------------------------------------------------------------------------------
/tools/generate_detections.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import os
3 | import errno
4 | import argparse
5 | import numpy as np
6 | import cv2
7 | import tensorflow.compat.v1 as tf
8 |
9 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
10 | if len(physical_devices) > 0:
11 | tf.config.experimental.set_memory_growth(physical_devices[0], True)
12 |
13 | def _run_in_batches(f, data_dict, out, batch_size):
14 | data_len = len(out)
15 | num_batches = int(data_len / batch_size)
16 |
17 | s, e = 0, 0
18 | for i in range(num_batches):
19 | s, e = i * batch_size, (i + 1) * batch_size
20 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
21 | out[s:e] = f(batch_data_dict)
22 | if e < len(out):
23 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
24 | out[e:] = f(batch_data_dict)
25 |
26 |
27 | def extract_image_patch(image, bbox, patch_shape):
28 | """Extract image patch from bounding box.
29 |
30 | Parameters
31 | ----------
32 | image : ndarray
33 | The full image.
34 | bbox : array_like
35 | The bounding box in format (x, y, width, height).
36 | patch_shape : Optional[array_like]
37 | This parameter can be used to enforce a desired patch shape
38 | (height, width). First, the `bbox` is adapted to the aspect ratio
39 | of the patch shape, then it is clipped at the image boundaries.
40 | If None, the shape is computed from :arg:`bbox`.
41 |
42 | Returns
43 | -------
44 | ndarray | NoneType
45 | An image patch showing the :arg:`bbox`, optionally reshaped to
46 | :arg:`patch_shape`.
47 | Returns None if the bounding box is empty or fully outside of the image
48 | boundaries.
49 |
50 | """
51 | bbox = np.array(bbox)
52 | if patch_shape is not None:
53 | # correct aspect ratio to patch shape
54 | target_aspect = float(patch_shape[1]) / patch_shape[0]
55 | new_width = target_aspect * bbox[3]
56 | bbox[0] -= (new_width - bbox[2]) / 2
57 | bbox[2] = new_width
58 |
59 | # convert to top left, bottom right
60 | bbox[2:] += bbox[:2]
61 | bbox = bbox.astype(np.int)
62 |
63 | # clip at image boundaries
64 | bbox[:2] = np.maximum(0, bbox[:2])
65 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
66 | if np.any(bbox[:2] >= bbox[2:]):
67 | return None
68 | sx, sy, ex, ey = bbox
69 | image = image[sy:ey, sx:ex]
70 | image = cv2.resize(image, tuple(patch_shape[::-1]))
71 | return image
72 |
73 |
74 | class ImageEncoder(object):
75 |
76 | def __init__(self, checkpoint_filename, input_name="images",
77 | output_name="features"):
78 | self.session = tf.Session()
79 | with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
80 | graph_def = tf.GraphDef()
81 | graph_def.ParseFromString(file_handle.read())
82 | tf.import_graph_def(graph_def, name="net")
83 | self.input_var = tf.get_default_graph().get_tensor_by_name(
84 | "%s:0" % input_name)
85 | self.output_var = tf.get_default_graph().get_tensor_by_name(
86 | "%s:0" % output_name)
87 |
88 | assert len(self.output_var.get_shape()) == 2
89 | assert len(self.input_var.get_shape()) == 4
90 | self.feature_dim = self.output_var.get_shape().as_list()[-1]
91 | self.image_shape = self.input_var.get_shape().as_list()[1:]
92 |
93 | def __call__(self, data_x, batch_size=32):
94 | out = np.zeros((len(data_x), self.feature_dim), np.float32)
95 | _run_in_batches(
96 | lambda x: self.session.run(self.output_var, feed_dict=x),
97 | {self.input_var: data_x}, out, batch_size)
98 | return out
99 |
100 |
101 | def create_box_encoder(model_filename, input_name="images",
102 | output_name="features", batch_size=32):
103 | image_encoder = ImageEncoder(model_filename, input_name, output_name)
104 | image_shape = image_encoder.image_shape
105 |
106 | def encoder(image, boxes):
107 | image_patches = []
108 | for box in boxes:
109 | patch = extract_image_patch(image, box, image_shape[:2])
110 | if patch is None:
111 | print("WARNING: Failed to extract image patch: %s." % str(box))
112 | patch = np.random.uniform(
113 | 0., 255., image_shape).astype(np.uint8)
114 | image_patches.append(patch)
115 | image_patches = np.asarray(image_patches)
116 | return image_encoder(image_patches, batch_size)
117 |
118 | return encoder
119 |
120 |
121 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
122 | """Generate detections with features.
123 |
124 | Parameters
125 | ----------
126 | encoder : Callable[image, ndarray] -> ndarray
127 | The encoder function takes as input a BGR color image and a matrix of
128 | bounding boxes in format `(x, y, w, h)` and returns a matrix of
129 | corresponding feature vectors.
130 | mot_dir : str
131 | Path to the MOTChallenge directory (can be either train or test).
132 | output_dir
133 | Path to the output directory. Will be created if it does not exist.
134 | detection_dir
135 | Path to custom detections. The directory structure should be the default
136 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
137 | standard MOTChallenge detections.
138 |
139 | """
140 | if detection_dir is None:
141 | detection_dir = mot_dir
142 | try:
143 | os.makedirs(output_dir)
144 | except OSError as exception:
145 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
146 | pass
147 | else:
148 | raise ValueError(
149 | "Failed to created output directory '%s'" % output_dir)
150 |
151 | for sequence in os.listdir(mot_dir):
152 | print("Processing %s" % sequence)
153 | sequence_dir = os.path.join(mot_dir, sequence)
154 |
155 | image_dir = os.path.join(sequence_dir, "img1")
156 | image_filenames = {
157 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
158 | for f in os.listdir(image_dir)}
159 |
160 | detection_file = os.path.join(
161 | detection_dir, sequence, "det/det.txt")
162 | detections_in = np.loadtxt(detection_file, delimiter=',')
163 | detections_out = []
164 |
165 | frame_indices = detections_in[:, 0].astype(np.int)
166 | min_frame_idx = frame_indices.astype(np.int).min()
167 | max_frame_idx = frame_indices.astype(np.int).max()
168 | for frame_idx in range(min_frame_idx, max_frame_idx + 1):
169 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
170 | mask = frame_indices == frame_idx
171 | rows = detections_in[mask]
172 |
173 | if frame_idx not in image_filenames:
174 | print("WARNING could not find image for frame %d" % frame_idx)
175 | continue
176 | bgr_image = cv2.imread(
177 | image_filenames[frame_idx], cv2.IMREAD_COLOR)
178 | features = encoder(bgr_image, rows[:, 2:6].copy())
179 | detections_out += [np.r_[(row, feature)] for row, feature
180 | in zip(rows, features)]
181 |
182 | output_filename = os.path.join(output_dir, "%s.npy" % sequence)
183 | np.save(
184 | output_filename, np.asarray(detections_out), allow_pickle=False)
185 |
186 |
187 | def parse_args():
188 | """Parse command line arguments.
189 | """
190 | parser = argparse.ArgumentParser(description="Re-ID feature extractor")
191 | parser.add_argument(
192 | "--model",
193 | default="resources/networks/mars-small128.pb",
194 | help="Path to freezed inference graph protobuf.")
195 | parser.add_argument(
196 | "--mot_dir", help="Path to MOTChallenge directory (train or test)",
197 | required=True)
198 | parser.add_argument(
199 | "--detection_dir", help="Path to custom detections. Defaults to "
200 | "standard MOT detections Directory structure should be the default "
201 | "MOTChallenge structure: [sequence]/det/det.txt", default=None)
202 | parser.add_argument(
203 | "--output_dir", help="Output directory. Will be created if it does not"
204 | " exist.", default="detections")
205 | return parser.parse_args()
206 |
207 |
208 | def main():
209 | args = parse_args()
210 | encoder = create_box_encoder(args.model, batch_size=32)
211 | generate_detections(encoder, args.mot_dir, args.output_dir,
212 | args.detection_dir)
213 |
214 |
215 | if __name__ == "__main__":
216 | main()
217 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/utils/__init__.py
--------------------------------------------------------------------------------
/utils/activations.py:
--------------------------------------------------------------------------------
1 | # Activation functions
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | # SiLU https://arxiv.org/pdf/1905.02244.pdf ----------------------------------------------------------------------------
9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU()
10 | @staticmethod
11 | def forward(x):
12 | return x * torch.sigmoid(x)
13 |
14 |
15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
16 | @staticmethod
17 | def forward(x):
18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML
19 | return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX
20 |
21 |
22 | class MemoryEfficientSwish(nn.Module):
23 | class F(torch.autograd.Function):
24 | @staticmethod
25 | def forward(ctx, x):
26 | ctx.save_for_backward(x)
27 | return x * torch.sigmoid(x)
28 |
29 | @staticmethod
30 | def backward(ctx, grad_output):
31 | x = ctx.saved_tensors[0]
32 | sx = torch.sigmoid(x)
33 | return grad_output * (sx * (1 + x * (1 - sx)))
34 |
35 | def forward(self, x):
36 | return self.F.apply(x)
37 |
38 |
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 | @staticmethod
42 | def forward(x):
43 | return x * F.softplus(x).tanh()
44 |
45 |
46 | class MemoryEfficientMish(nn.Module):
47 | class F(torch.autograd.Function):
48 | @staticmethod
49 | def forward(ctx, x):
50 | ctx.save_for_backward(x)
51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
52 |
53 | @staticmethod
54 | def backward(ctx, grad_output):
55 | x = ctx.saved_tensors[0]
56 | sx = torch.sigmoid(x)
57 | fx = F.softplus(x).tanh()
58 | return grad_output * (fx + x * sx * (1 - fx * fx))
59 |
60 | def forward(self, x):
61 | return self.F.apply(x)
62 |
63 |
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 | def __init__(self, c1, k=3): # ch_in, kernel
67 | super().__init__()
68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 | self.bn = nn.BatchNorm2d(c1)
70 |
71 | def forward(self, x):
72 | return torch.max(x, self.bn(self.conv(x)))
73 |
--------------------------------------------------------------------------------
/utils/autoanchor.py:
--------------------------------------------------------------------------------
1 | # Auto-anchor utils
2 |
3 | import numpy as np
4 | import torch
5 | import yaml
6 | from scipy.cluster.vq import kmeans
7 | from tqdm import tqdm
8 |
9 |
10 | def check_anchor_order(m):
11 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
12 | a = m.anchor_grid.prod(-1).view(-1) # anchor area
13 | da = a[-1] - a[0] # delta a
14 | ds = m.stride[-1] - m.stride[0] # delta s
15 | if da.sign() != ds.sign(): # same order
16 | print('Reversing anchor order')
17 | m.anchors[:] = m.anchors.flip(0)
18 | m.anchor_grid[:] = m.anchor_grid.flip(0)
19 |
20 |
21 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
22 | # Check anchor fit to data, recompute if necessary
23 | print('\nAnalyzing anchors... ', end='')
24 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
25 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
26 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
27 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh
28 |
29 | def metric(k): # compute metric
30 | r = wh[:, None] / k[None]
31 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric
32 | best = x.max(1)[0] # best_x
33 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold
34 | bpr = (best > 1. / thr).float().mean() # best possible recall
35 | return bpr, aat
36 |
37 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
38 | print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='')
39 | if bpr < 0.98: # threshold to recompute
40 | print('. Attempting to improve anchors, please wait...')
41 | na = m.anchor_grid.numel() // 2 # number of anchors
42 | new_anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
43 | new_bpr = metric(new_anchors.reshape(-1, 2))[0]
44 | if new_bpr > bpr: # replace anchors
45 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(m.anchors)
46 | m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference
47 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
48 | check_anchor_order(m)
49 | print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
50 | else:
51 | print('Original anchors better than new anchors. Proceeding with original anchors.')
52 | print('') # newline
53 |
54 |
55 | def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
56 | """ Creates kmeans-evolved anchors from training dataset
57 |
58 | Arguments:
59 | path: path to dataset *.yaml, or a loaded dataset
60 | n: number of anchors
61 | img_size: image size used for training
62 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
63 | gen: generations to evolve anchors using genetic algorithm
64 | verbose: print all results
65 |
66 | Return:
67 | k: kmeans evolved anchors
68 |
69 | Usage:
70 | from utils.autoanchor import *; _ = kmean_anchors()
71 | """
72 | thr = 1. / thr
73 |
74 | def metric(k, wh): # compute metrics
75 | r = wh[:, None] / k[None]
76 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric
77 | # x = wh_iou(wh, torch.tensor(k)) # iou metric
78 | return x, x.max(1)[0] # x, best_x
79 |
80 | def anchor_fitness(k): # mutation fitness
81 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
82 | return (best * (best > thr).float()).mean() # fitness
83 |
84 | def print_results(k):
85 | k = k[np.argsort(k.prod(1))] # sort small to large
86 | x, best = metric(k, wh0)
87 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
88 | print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
89 | print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
90 | (n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
91 | for i, x in enumerate(k):
92 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
93 | return k
94 |
95 | if isinstance(path, str): # *.yaml file
96 | with open(path) as f:
97 | data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
98 | from utils.datasets import LoadImagesAndLabels
99 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
100 | else:
101 | dataset = path # dataset
102 |
103 | # Get label wh
104 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
105 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
106 |
107 | # Filter
108 | i = (wh0 < 3.0).any(1).sum()
109 | if i:
110 | print('WARNING: Extremely small objects found. '
111 | '%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
112 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
113 |
114 | # Kmeans calculation
115 | print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
116 | s = wh.std(0) # sigmas for whitening
117 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
118 | k *= s
119 | wh = torch.tensor(wh, dtype=torch.float32) # filtered
120 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
121 | k = print_results(k)
122 |
123 | # Plot
124 | # k, d = [None] * 20, [None] * 20
125 | # for i in tqdm(range(1, 21)):
126 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
127 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
128 | # ax = ax.ravel()
129 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
130 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
131 | # ax[0].hist(wh[wh[:, 0]<100, 0],400)
132 | # ax[1].hist(wh[wh[:, 1]<100, 1],400)
133 | # fig.savefig('wh.png', dpi=200)
134 |
135 | # Evolve
136 | npr = np.random
137 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
138 | pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar
139 | for _ in pbar:
140 | v = np.ones(sh)
141 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
142 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
143 | kg = (k.copy() * v).clip(min=2.0)
144 | fg = anchor_fitness(kg)
145 | if fg > f:
146 | f, k = fg, kg.copy()
147 | pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
148 | if verbose:
149 | print_results(k)
150 |
151 | return print_results(k)
152 |
--------------------------------------------------------------------------------
/utils/google_app_engine/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM gcr.io/google-appengine/python
2 |
3 | # Create a virtualenv for dependencies. This isolates these packages from
4 | # system-level packages.
5 | # Use -p python3 or -p python3.7 to select python version. Default is version 2.
6 | RUN virtualenv /env -p python3
7 |
8 | # Setting these environment variables are the same as running
9 | # source /env/bin/activate.
10 | ENV VIRTUAL_ENV /env
11 | ENV PATH /env/bin:$PATH
12 |
13 | RUN apt-get update && apt-get install -y python-opencv
14 |
15 | # Copy the application's requirements.txt and run pip to install all
16 | # dependencies into the virtualenv.
17 | ADD requirements.txt /app/requirements.txt
18 | RUN pip install -r /app/requirements.txt
19 |
20 | # Add the application source code.
21 | ADD . /app
22 |
23 | # Run a WSGI server to serve the application. gunicorn must be declared as
24 | # a dependency in requirements.txt.
25 | CMD gunicorn -b :$PORT main:app
26 |
--------------------------------------------------------------------------------
/utils/google_app_engine/additional_requirements.txt:
--------------------------------------------------------------------------------
1 | # add these requirements in your app on top of the existing ones
2 | pip==19.2
3 | Flask==2.3.2
4 | gunicorn==19.9.0
5 |
--------------------------------------------------------------------------------
/utils/google_app_engine/app.yaml:
--------------------------------------------------------------------------------
1 | runtime: custom
2 | env: flex
3 |
4 | service: yolov5app
5 |
6 | liveness_check:
7 | initial_delay_sec: 600
8 |
9 | manual_scaling:
10 | instances: 1
11 | resources:
12 | cpu: 1
13 | memory_gb: 4
14 | disk_size_gb: 20
--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries
2 |
3 | import os
4 | import platform
5 | import subprocess
6 | import time
7 | from pathlib import Path
8 |
9 | import torch
10 |
11 |
12 | def gsutil_getsize(url=''):
13 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
14 | s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8')
15 | return eval(s.split(' ')[0]) if len(s) else 0 # bytes
16 |
17 |
18 | def attempt_download(weights):
19 | # Attempt to download pretrained weights if not found locally
20 | weights = str(weights).strip().replace("'", '')
21 | file = Path(weights).name.lower()
22 |
23 | msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/'
24 | models = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'] # available models
25 | redundant = False # offer second download option
26 |
27 | if file in models and not os.path.isfile(weights):
28 | # Google Drive
29 | # d = {'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO',
30 | # 'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr',
31 | # 'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV',
32 | # 'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS'}
33 | # r = gdrive_download(id=d[file], name=weights) if file in d else 1
34 | # if r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6: # check
35 | # return
36 |
37 | try: # GitHub
38 | url = 'https://github.com/ultralytics/yolov5/releases/download/v3.1/' + file
39 | print('Downloading %s to %s...' % (url, weights))
40 | torch.hub.download_url_to_file(url, weights)
41 | assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check
42 | except Exception as e: # GCP
43 | print('Download error: %s' % e)
44 | assert redundant, 'No secondary mirror'
45 | url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file
46 | print('Downloading %s to %s...' % (url, weights))
47 | r = os.system('curl -L %s -o %s' % (url, weights)) # torch.hub.download_url_to_file(url, weights)
48 | finally:
49 | if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6): # check
50 | os.remove(weights) if os.path.exists(weights) else None # remove partial downloads
51 | print('ERROR: Download failure: %s' % msg)
52 | print('')
53 | return
54 |
55 |
56 | def gdrive_download(id='1uH2BylpFxHKEGXKL6wJJlsgMU2YEjxuc', name='tmp.zip'):
57 | # Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download()
58 | t = time.time()
59 |
60 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
61 | os.remove(name) if os.path.exists(name) else None # remove existing
62 | os.remove('cookie') if os.path.exists('cookie') else None
63 |
64 | # Attempt file download
65 | out = "NUL" if platform.system() == "Windows" else "/dev/null"
66 | os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out))
67 | if os.path.exists('cookie'): # large file
68 | s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name)
69 | else: # small file
70 | s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
71 | r = os.system(s) # execute, capture return
72 | os.remove('cookie') if os.path.exists('cookie') else None
73 |
74 | # Error check
75 | if r != 0:
76 | os.remove(name) if os.path.exists(name) else None # remove partial
77 | print('Download error ') # raise Exception('Download error')
78 | return r
79 |
80 | # Unzip if archive
81 | if name.endswith('.zip'):
82 | print('unzipping... ', end='')
83 | os.system('unzip -q %s' % name) # unzip
84 | os.remove(name) # remove zip to free space
85 |
86 | print('Done (%.1fs)' % (time.time() - t))
87 | return r
88 |
89 |
90 | def get_token(cookie="./cookie"):
91 | with open(cookie) as f:
92 | for line in f:
93 | if "download" in line:
94 | return line.split()[-1]
95 | return ""
96 |
97 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
98 | # # Uploads a file to a bucket
99 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
100 | #
101 | # storage_client = storage.Client()
102 | # bucket = storage_client.get_bucket(bucket_name)
103 | # blob = bucket.blob(destination_blob_name)
104 | #
105 | # blob.upload_from_filename(source_file_name)
106 | #
107 | # print('File {} uploaded to {}.'.format(
108 | # source_file_name,
109 | # destination_blob_name))
110 | #
111 | #
112 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
113 | # # Uploads a blob from a bucket
114 | # storage_client = storage.Client()
115 | # bucket = storage_client.get_bucket(bucket_name)
116 | # blob = bucket.blob(source_blob_name)
117 | #
118 | # blob.download_to_filename(destination_file_name)
119 | #
120 | # print('Blob {} downloaded to {}.'.format(
121 | # source_blob_name,
122 | # destination_file_name))
123 |
--------------------------------------------------------------------------------
/utils/loss.py:
--------------------------------------------------------------------------------
1 | # Loss functions
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | from utils.general import bbox_iou
7 | from utils.torch_utils import is_parallel
8 |
9 |
10 | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
11 | # return positive, negative label smoothing BCE targets
12 | return 1.0 - 0.5 * eps, 0.5 * eps
13 |
14 |
15 | class BCEBlurWithLogitsLoss(nn.Module):
16 | # BCEwithLogitLoss() with reduced missing label effects.
17 | def __init__(self, alpha=0.05):
18 | super(BCEBlurWithLogitsLoss, self).__init__()
19 | self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss()
20 | self.alpha = alpha
21 |
22 | def forward(self, pred, true):
23 | loss = self.loss_fcn(pred, true)
24 | pred = torch.sigmoid(pred) # prob from logits
25 | dx = pred - true # reduce only missing label effects
26 | # dx = (pred - true).abs() # reduce missing label and false label effects
27 | alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
28 | loss *= alpha_factor
29 | return loss.mean()
30 |
31 |
32 | class FocalLoss(nn.Module):
33 | # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
34 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
35 | super(FocalLoss, self).__init__()
36 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
37 | self.gamma = gamma
38 | self.alpha = alpha
39 | self.reduction = loss_fcn.reduction
40 | self.loss_fcn.reduction = 'none' # required to apply FL to each element
41 |
42 | def forward(self, pred, true):
43 | loss = self.loss_fcn(pred, true)
44 | # p_t = torch.exp(-loss)
45 | # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
46 |
47 | # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
48 | pred_prob = torch.sigmoid(pred) # prob from logits
49 | p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
50 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
51 | modulating_factor = (1.0 - p_t) ** self.gamma
52 | loss *= alpha_factor * modulating_factor
53 |
54 | if self.reduction == 'mean':
55 | return loss.mean()
56 | elif self.reduction == 'sum':
57 | return loss.sum()
58 | else: # 'none'
59 | return loss
60 |
61 |
62 | class QFocalLoss(nn.Module):
63 | # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
64 | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
65 | super(QFocalLoss, self).__init__()
66 | self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
67 | self.gamma = gamma
68 | self.alpha = alpha
69 | self.reduction = loss_fcn.reduction
70 | self.loss_fcn.reduction = 'none' # required to apply FL to each element
71 |
72 | def forward(self, pred, true):
73 | loss = self.loss_fcn(pred, true)
74 |
75 | pred_prob = torch.sigmoid(pred) # prob from logits
76 | alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
77 | modulating_factor = torch.abs(true - pred_prob) ** self.gamma
78 | loss *= alpha_factor * modulating_factor
79 |
80 | if self.reduction == 'mean':
81 | return loss.mean()
82 | elif self.reduction == 'sum':
83 | return loss.sum()
84 | else: # 'none'
85 | return loss
86 |
87 |
88 | def compute_loss(p, targets, model): # predictions, targets, model
89 | device = targets.device
90 | lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
91 | tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets
92 | h = model.hyp # hyperparameters
93 |
94 | # Define criteria
95 | BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) # weight=model.class_weights)
96 | BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))
97 |
98 | # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
99 | cp, cn = smooth_BCE(eps=0.0)
100 |
101 | # Focal loss
102 | g = h['fl_gamma'] # focal loss gamma
103 | if g > 0:
104 | BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
105 |
106 | # Losses
107 | nt = 0 # number of targets
108 | no = len(p) # number of outputs
109 | balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
110 | for i, pi in enumerate(p): # layer index, layer predictions
111 | b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
112 | tobj = torch.zeros_like(pi[..., 0], device=device) # target obj
113 |
114 | n = b.shape[0] # number of targets
115 | if n:
116 | nt += n # cumulative targets
117 | ps = pi[b, a, gj, gi] # prediction subset corresponding to targets
118 |
119 | # Regression
120 | pxy = ps[:, :2].sigmoid() * 2. - 0.5
121 | pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
122 | pbox = torch.cat((pxy, pwh), 1) # predicted box
123 | iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
124 | lbox += (1.0 - iou).mean() # iou loss
125 |
126 | # Objectness
127 | tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio
128 |
129 | # Classification
130 | if model.nc > 1: # cls loss (only if multiple classes)
131 | t = torch.full_like(ps[:, 5:], cn, device=device) # targets
132 | t[range(n), tcls[i]] = cp
133 | lcls += BCEcls(ps[:, 5:], t) # BCE
134 |
135 | # Append targets to text file
136 | # with open('targets.txt', 'a') as file:
137 | # [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
138 |
139 | lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss
140 |
141 | s = 3 / no # output count scaling
142 | lbox *= h['box'] * s
143 | lobj *= h['obj'] * s * (1.4 if no == 4 else 1.)
144 | lcls *= h['cls'] * s
145 | bs = tobj.shape[0] # batch size
146 |
147 | loss = lbox + lobj + lcls
148 | return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()
149 |
150 |
151 | def build_targets(p, targets, model):
152 | # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
153 | det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module
154 | na, nt = det.na, targets.shape[0] # number of anchors, targets
155 | tcls, tbox, indices, anch = [], [], [], []
156 | gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
157 | ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
158 | targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices
159 |
160 | g = 0.5 # bias
161 | off = torch.tensor([[0, 0],
162 | [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
163 | # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
164 | ], device=targets.device).float() * g # offsets
165 |
166 | for i in range(det.nl):
167 | anchors = det.anchors[i]
168 | gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain
169 |
170 | # Match targets to anchors
171 | t = targets * gain
172 | if nt:
173 | # Matches
174 | r = t[:, :, 4:6] / anchors[:, None] # wh ratio
175 | j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare
176 | # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
177 | t = t[j] # filter
178 |
179 | # Offsets
180 | gxy = t[:, 2:4] # grid xy
181 | gxi = gain[[2, 3]] - gxy # inverse
182 | j, k = ((gxy % 1. < g) & (gxy > 1.)).T
183 | l, m = ((gxi % 1. < g) & (gxi > 1.)).T
184 | j = torch.stack((torch.ones_like(j), j, k, l, m))
185 | t = t.repeat((5, 1, 1))[j]
186 | offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
187 | else:
188 | t = targets[0]
189 | offsets = 0
190 |
191 | # Define
192 | b, c = t[:, :2].long().T # image, class
193 | gxy = t[:, 2:4] # grid xy
194 | gwh = t[:, 4:6] # grid wh
195 | gij = (gxy - offsets).long()
196 | gi, gj = gij.T # grid xy indices
197 |
198 | # Append
199 | a = t[:, 6].long() # anchor indices
200 | indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
201 | tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
202 | anch.append(anchors[a]) # anchors
203 | tcls.append(c) # class
204 |
205 | return tcls, tbox, indices, anch
206 |
--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
1 | # Model validation metrics
2 |
3 | from pathlib import Path
4 |
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import torch
8 |
9 | from . import general
10 |
11 |
12 | def fitness(x):
13 | # Model fitness as a weighted combination of metrics
14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
15 | return (x[:, :4] * w).sum(1)
16 |
17 |
18 | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
19 | """ Compute the average precision, given the recall and precision curves.
20 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
21 | # Arguments
22 | tp: True positives (nparray, nx1 or nx10).
23 | conf: Objectness value from 0-1 (nparray).
24 | pred_cls: Predicted object classes (nparray).
25 | target_cls: True object classes (nparray).
26 | plot: Plot precision-recall curve at mAP@0.5
27 | save_dir: Plot save directory
28 | # Returns
29 | The average precision as computed in py-faster-rcnn.
30 | """
31 |
32 | # Sort by objectness
33 | i = np.argsort(-conf)
34 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
35 |
36 | # Find unique classes
37 | unique_classes = np.unique(target_cls)
38 |
39 | # Create Precision-Recall curve and compute AP for each class
40 | px, py = np.linspace(0, 1, 1000), [] # for plotting
41 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
42 | s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
43 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
44 | for ci, c in enumerate(unique_classes):
45 | i = pred_cls == c
46 | n_l = (target_cls == c).sum() # number of labels
47 | n_p = i.sum() # number of predictions
48 |
49 | if n_p == 0 or n_l == 0:
50 | continue
51 | else:
52 | # Accumulate FPs and TPs
53 | fpc = (1 - tp[i]).cumsum(0)
54 | tpc = tp[i].cumsum(0)
55 |
56 | # Recall
57 | recall = tpc / (n_l + 1e-16) # recall curve
58 | r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases
59 |
60 | # Precision
61 | precision = tpc / (tpc + fpc) # precision curve
62 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score
63 |
64 | # AP from recall-precision curve
65 | for j in range(tp.shape[1]):
66 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
67 | if plot and (j == 0):
68 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
69 |
70 | # Compute F1 score (harmonic mean of precision and recall)
71 | f1 = 2 * p * r / (p + r + 1e-16)
72 |
73 | if plot:
74 | plot_pr_curve(px, py, ap, save_dir, names)
75 |
76 | return p, r, ap, f1, unique_classes.astype('int32')
77 |
78 |
79 | def compute_ap(recall, precision):
80 | """ Compute the average precision, given the recall and precision curves
81 | # Arguments
82 | recall: The recall curve (list)
83 | precision: The precision curve (list)
84 | # Returns
85 | Average precision, precision curve, recall curve
86 | """
87 |
88 | # Append sentinel values to beginning and end
89 | mrec = np.concatenate(([0.], recall, [recall[-1] + 0.01]))
90 | mpre = np.concatenate(([1.], precision, [0.]))
91 |
92 | # Compute the precision envelope
93 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
94 |
95 | # Integrate area under curve
96 | method = 'interp' # methods: 'continuous', 'interp'
97 | if method == 'interp':
98 | x = np.linspace(0, 1, 101) # 101-point interp (COCO)
99 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
100 | else: # 'continuous'
101 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
102 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
103 |
104 | return ap, mpre, mrec
105 |
106 |
107 | class ConfusionMatrix:
108 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
109 | def __init__(self, nc, conf=0.25, iou_thres=0.45):
110 | self.matrix = np.zeros((nc + 1, nc + 1))
111 | self.nc = nc # number of classes
112 | self.conf = conf
113 | self.iou_thres = iou_thres
114 |
115 | def process_batch(self, detections, labels):
116 | """
117 | Return intersection-over-union (Jaccard index) of boxes.
118 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
119 | Arguments:
120 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class
121 | labels (Array[M, 5]), class, x1, y1, x2, y2
122 | Returns:
123 | None, updates confusion matrix accordingly
124 | """
125 | detections = detections[detections[:, 4] > self.conf]
126 | gt_classes = labels[:, 0].int()
127 | detection_classes = detections[:, 5].int()
128 | iou = general.box_iou(labels[:, 1:], detections[:, :4])
129 |
130 | x = torch.where(iou > self.iou_thres)
131 | if x[0].shape[0]:
132 | matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()
133 | if x[0].shape[0] > 1:
134 | matches = matches[matches[:, 2].argsort()[::-1]]
135 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
136 | matches = matches[matches[:, 2].argsort()[::-1]]
137 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
138 | else:
139 | matches = np.zeros((0, 3))
140 |
141 | n = matches.shape[0] > 0
142 | m0, m1, _ = matches.transpose().astype(np.int16)
143 | for i, gc in enumerate(gt_classes):
144 | j = m0 == i
145 | if n and sum(j) == 1:
146 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct
147 | else:
148 | self.matrix[gc, self.nc] += 1 # background FP
149 |
150 | if n:
151 | for i, dc in enumerate(detection_classes):
152 | if not any(m1 == i):
153 | self.matrix[self.nc, dc] += 1 # background FN
154 |
155 | def matrix(self):
156 | return self.matrix
157 |
158 | def plot(self, save_dir='', names=()):
159 | try:
160 | import seaborn as sn
161 |
162 | array = self.matrix / (self.matrix.sum(0).reshape(1, self.nc + 1) + 1E-6) # normalize
163 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
164 |
165 | fig = plt.figure(figsize=(12, 9), tight_layout=True)
166 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
167 | labels = (0 < len(names) < 99) and len(names) == self.nc # apply names to ticklabels
168 | sn.heatmap(array, annot=self.nc < 30, annot_kws={"size": 8}, cmap='Blues', fmt='.2f', square=True,
169 | xticklabels=names + ['background FN'] if labels else "auto",
170 | yticklabels=names + ['background FP'] if labels else "auto").set_facecolor((1, 1, 1))
171 | fig.axes[0].set_xlabel('True')
172 | fig.axes[0].set_ylabel('Predicted')
173 | fig.savefig(Path(save_dir) / 'confusion_matrix.png', dpi=250)
174 | except Exception as e:
175 | pass
176 |
177 | def print(self):
178 | for i in range(self.nc + 1):
179 | print(' '.join(map(str, self.matrix[i])))
180 |
181 |
182 | # Plots ----------------------------------------------------------------------------------------------------------------
183 |
184 | def plot_pr_curve(px, py, ap, save_dir='.', names=()):
185 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
186 | py = np.stack(py, axis=1)
187 |
188 | if 0 < len(names) < 21: # show mAP in legend if < 10 classes
189 | for i, y in enumerate(py.T):
190 | ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision)
191 | else:
192 | ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)
193 |
194 | ax.plot(px, py.mean(1), linewidth=3, color='blue', label='all classes %.3f mAP@0.5' % ap[:, 0].mean())
195 | ax.set_xlabel('Recall')
196 | ax.set_ylabel('Precision')
197 | ax.set_xlim(0, 1)
198 | ax.set_ylim(0, 1)
199 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
200 | fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
201 |
--------------------------------------------------------------------------------
/utils/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/roboflow/zero-shot-object-tracking/cbf83e476bf1ed4614bb6b3630820959bdfe1782/utils/models/__init__.py
--------------------------------------------------------------------------------
/utils/models/experimental.py:
--------------------------------------------------------------------------------
1 | # This file contains experimental modules
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 |
7 | from models.common import Conv, DWConv
8 | from utils.google_utils import attempt_download
9 |
10 |
11 | class CrossConv(nn.Module):
12 | # Cross Convolution Downsample
13 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
14 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
15 | super(CrossConv, self).__init__()
16 | c_ = int(c2 * e) # hidden channels
17 | self.cv1 = Conv(c1, c_, (1, k), (1, s))
18 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
19 | self.add = shortcut and c1 == c2
20 |
21 | def forward(self, x):
22 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
23 |
24 |
25 | class Sum(nn.Module):
26 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
27 | def __init__(self, n, weight=False): # n: number of inputs
28 | super(Sum, self).__init__()
29 | self.weight = weight # apply weights boolean
30 | self.iter = range(n - 1) # iter object
31 | if weight:
32 | self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights
33 |
34 | def forward(self, x):
35 | y = x[0] # no weight
36 | if self.weight:
37 | w = torch.sigmoid(self.w) * 2
38 | for i in self.iter:
39 | y = y + x[i + 1] * w[i]
40 | else:
41 | for i in self.iter:
42 | y = y + x[i + 1]
43 | return y
44 |
45 |
46 | class GhostConv(nn.Module):
47 | # Ghost Convolution https://github.com/huawei-noah/ghostnet
48 | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
49 | super(GhostConv, self).__init__()
50 | c_ = c2 // 2 # hidden channels
51 | self.cv1 = Conv(c1, c_, k, s, None, g, act)
52 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
53 |
54 | def forward(self, x):
55 | y = self.cv1(x)
56 | return torch.cat([y, self.cv2(y)], 1)
57 |
58 |
59 | class GhostBottleneck(nn.Module):
60 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
61 | def __init__(self, c1, c2, k, s):
62 | super(GhostBottleneck, self).__init__()
63 | c_ = c2 // 2
64 | self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
65 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
66 | GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
67 | self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
68 | Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
69 |
70 | def forward(self, x):
71 | return self.conv(x) + self.shortcut(x)
72 |
73 |
74 | class MixConv2d(nn.Module):
75 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
76 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
77 | super(MixConv2d, self).__init__()
78 | groups = len(k)
79 | if equal_ch: # equal c_ per group
80 | i = torch.linspace(0, groups - 1E-6, c2).floor() # c2 indices
81 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
82 | else: # equal weight.numel() per group
83 | b = [c2] + [0] * groups
84 | a = np.eye(groups + 1, groups, k=-1)
85 | a -= np.roll(a, 1, axis=1)
86 | a *= np.array(k) ** 2
87 | a[0] = 1
88 | c_ = np.linalg.lstsq(a, b, rcond=None)[0].round() # solve for equal weight indices, ax = b
89 |
90 | self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
91 | self.bn = nn.BatchNorm2d(c2)
92 | self.act = nn.LeakyReLU(0.1, inplace=True)
93 |
94 | def forward(self, x):
95 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
96 |
97 |
98 | class Ensemble(nn.ModuleList):
99 | # Ensemble of models
100 | def __init__(self):
101 | super(Ensemble, self).__init__()
102 |
103 | def forward(self, x, augment=False):
104 | y = []
105 | for module in self:
106 | y.append(module(x, augment)[0])
107 | # y = torch.stack(y).max(0)[0] # max ensemble
108 | # y = torch.cat(y, 1) # nms ensemble
109 | y = torch.stack(y).mean(0) # mean ensemble
110 | return y, None # inference, train output
111 |
112 |
113 | def attempt_load(weights, map_location=None):
114 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
115 | model = Ensemble()
116 | for w in weights if isinstance(weights, list) else [weights]:
117 | attempt_download(w)
118 | model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model
119 |
120 | # Compatibility updates
121 | for m in model.modules():
122 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
123 | m.inplace = True # pytorch 1.7.0 compatibility
124 | elif type(m) is Conv:
125 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
126 |
127 | if len(model) == 1:
128 | return model[-1] # return model
129 | else:
130 | print('Ensemble created with %s\n' % weights)
131 | for k in ['names', 'stride']:
132 | setattr(model, k, getattr(model[-1], k))
133 | return model # return ensemble
134 |
--------------------------------------------------------------------------------
/utils/models/export.py:
--------------------------------------------------------------------------------
1 | """Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
2 |
3 | Usage:
4 | $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
5 | """
6 |
7 | import argparse
8 | import sys
9 | import time
10 |
11 | sys.path.append('./') # to run '$ python *.py' files in subdirectories
12 |
13 | import torch
14 | import torch.nn as nn
15 |
16 | import models
17 | from models.experimental import attempt_load
18 | from utils.activations import Hardswish, SiLU
19 | from utils.general import set_logging, check_img_size
20 |
21 | if __name__ == '__main__':
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
24 | parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
25 | parser.add_argument('--batch-size', type=int, default=1, help='batch size')
26 | opt = parser.parse_args()
27 | opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
28 | print(opt)
29 | set_logging()
30 | t = time.time()
31 |
32 | # Load PyTorch model
33 | model = attempt_load(opt.weights, map_location=torch.device('cpu')) # load FP32 model
34 | labels = model.names
35 |
36 | # Checks
37 | gs = int(max(model.stride)) # grid size (max stride)
38 | opt.img_size = [check_img_size(x, gs) for x in opt.img_size] # verify img_size are gs-multiples
39 |
40 | # Input
41 | img = torch.zeros(opt.batch_size, 3, *opt.img_size) # image size(1,3,320,192) iDetection
42 |
43 | # Update model
44 | for k, m in model.named_modules():
45 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
46 | if isinstance(m, models.common.Conv): # assign export-friendly activations
47 | if isinstance(m.act, nn.Hardswish):
48 | m.act = Hardswish()
49 | elif isinstance(m.act, nn.SiLU):
50 | m.act = SiLU()
51 | # elif isinstance(m, models.yolo.Detect):
52 | # m.forward = m.forward_export # assign forward (optional)
53 | model.model[-1].export = True # set Detect() layer export=True
54 | y = model(img) # dry run
55 |
56 | # TorchScript export
57 | try:
58 | print('\nStarting TorchScript export with torch %s...' % torch.__version__)
59 | f = opt.weights.replace('.pt', '.torchscript.pt') # filename
60 | ts = torch.jit.trace(model, img)
61 | ts.save(f)
62 | print('TorchScript export success, saved as %s' % f)
63 | except Exception as e:
64 | print('TorchScript export failure: %s' % e)
65 |
66 | # ONNX export
67 | try:
68 | import onnx
69 |
70 | print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
71 | f = opt.weights.replace('.pt', '.onnx') # filename
72 | torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
73 | output_names=['classes', 'boxes'] if y is None else ['output'])
74 |
75 | # Checks
76 | onnx_model = onnx.load(f) # load onnx model
77 | onnx.checker.check_model(onnx_model) # check onnx model
78 | # print(onnx.helper.printable_graph(onnx_model.graph)) # print a human readable model
79 | print('ONNX export success, saved as %s' % f)
80 | except Exception as e:
81 | print('ONNX export failure: %s' % e)
82 |
83 | # CoreML export
84 | try:
85 | import coremltools as ct
86 |
87 | print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
88 | # convert model from torchscript and apply pixel scaling as per detect.py
89 | model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
90 | f = opt.weights.replace('.pt', '.mlmodel') # filename
91 | model.save(f)
92 | print('CoreML export success, saved as %s' % f)
93 | except Exception as e:
94 | print('CoreML export failure: %s' % e)
95 |
96 | # Finish
97 | print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
98 |
--------------------------------------------------------------------------------
/utils/models/hub/yolov3-spp.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # darknet53 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [32, 3, 1]], # 0
16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 | [-1, 1, Bottleneck, [64]],
18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 | [-1, 2, Bottleneck, [128]],
20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 | [-1, 8, Bottleneck, [256]],
22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 | [-1, 8, Bottleneck, [512]],
24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 | [-1, 4, Bottleneck, [1024]], # 10
26 | ]
27 |
28 | # YOLOv3-SPP head
29 | head:
30 | [[-1, 1, Bottleneck, [1024, False]],
31 | [-1, 1, SPP, [512, [5, 9, 13]]],
32 | [-1, 1, Conv, [1024, 3, 1]],
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35 |
36 | [-2, 1, Conv, [256, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 | [-1, 1, Bottleneck, [512, False]],
40 | [-1, 1, Bottleneck, [512, False]],
41 | [-1, 1, Conv, [256, 1, 1]],
42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43 |
44 | [-2, 1, Conv, [128, 1, 1]],
45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 | [-1, 1, Bottleneck, [256, False]],
48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49 |
50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 | ]
52 |
--------------------------------------------------------------------------------
/utils/models/hub/yolov3-tiny.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,14, 23,27, 37,58] # P4/16
9 | - [81,82, 135,169, 344,319] # P5/32
10 |
11 | # YOLOv3-tiny backbone
12 | backbone:
13 | # [from, number, module, args]
14 | [[-1, 1, Conv, [16, 3, 1]], # 0
15 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 1-P1/2
16 | [-1, 1, Conv, [32, 3, 1]],
17 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 3-P2/4
18 | [-1, 1, Conv, [64, 3, 1]],
19 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 5-P3/8
20 | [-1, 1, Conv, [128, 3, 1]],
21 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 7-P4/16
22 | [-1, 1, Conv, [256, 3, 1]],
23 | [-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
24 | [-1, 1, Conv, [512, 3, 1]],
25 | [-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]], # 11
26 | [-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
27 | ]
28 |
29 | # YOLOv3-tiny head
30 | head:
31 | [[-1, 1, Conv, [1024, 3, 1]],
32 | [-1, 1, Conv, [256, 1, 1]],
33 | [-1, 1, Conv, [512, 3, 1]], # 15 (P5/32-large)
34 |
35 | [-2, 1, Conv, [128, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
38 | [-1, 1, Conv, [256, 3, 1]], # 19 (P4/16-medium)
39 |
40 | [[19, 15], 1, Detect, [nc, anchors]], # Detect(P4, P5)
41 | ]
42 |
--------------------------------------------------------------------------------
/utils/models/hub/yolov3.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # darknet53 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Conv, [32, 3, 1]], # 0
16 | [-1, 1, Conv, [64, 3, 2]], # 1-P1/2
17 | [-1, 1, Bottleneck, [64]],
18 | [-1, 1, Conv, [128, 3, 2]], # 3-P2/4
19 | [-1, 2, Bottleneck, [128]],
20 | [-1, 1, Conv, [256, 3, 2]], # 5-P3/8
21 | [-1, 8, Bottleneck, [256]],
22 | [-1, 1, Conv, [512, 3, 2]], # 7-P4/16
23 | [-1, 8, Bottleneck, [512]],
24 | [-1, 1, Conv, [1024, 3, 2]], # 9-P5/32
25 | [-1, 4, Bottleneck, [1024]], # 10
26 | ]
27 |
28 | # YOLOv3 head
29 | head:
30 | [[-1, 1, Bottleneck, [1024, False]],
31 | [-1, 1, Conv, [512, [1, 1]]],
32 | [-1, 1, Conv, [1024, 3, 1]],
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 1, Conv, [1024, 3, 1]], # 15 (P5/32-large)
35 |
36 | [-2, 1, Conv, [256, 1, 1]],
37 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
38 | [[-1, 8], 1, Concat, [1]], # cat backbone P4
39 | [-1, 1, Bottleneck, [512, False]],
40 | [-1, 1, Bottleneck, [512, False]],
41 | [-1, 1, Conv, [256, 1, 1]],
42 | [-1, 1, Conv, [512, 3, 1]], # 22 (P4/16-medium)
43 |
44 | [-2, 1, Conv, [128, 1, 1]],
45 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
46 | [[-1, 6], 1, Concat, [1]], # cat backbone P3
47 | [-1, 1, Bottleneck, [256, False]],
48 | [-1, 2, Bottleneck, [256, False]], # 27 (P3/8-small)
49 |
50 | [[27, 22, 15], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
51 | ]
52 |
--------------------------------------------------------------------------------
/utils/models/hub/yolov5-fpn.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, Bottleneck, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 6, BottleneckCSP, [1024]], # 9
25 | ]
26 |
27 | # YOLOv5 FPN head
28 | head:
29 | [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large)
30 |
31 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
33 | [-1, 1, Conv, [512, 1, 1]],
34 | [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium)
35 |
36 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
38 | [-1, 1, Conv, [256, 1, 1]],
39 | [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small)
40 |
41 | [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
42 | ]
43 |
--------------------------------------------------------------------------------
/utils/models/hub/yolov5-panet.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 PANet head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/utils/models/yolov5l.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/utils/models/yolov5m.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 0.67 # model depth multiple
4 | width_multiple: 0.75 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/utils/models/yolov5s.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 0.33 # model depth multiple
4 | width_multiple: 0.50 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/utils/models/yolov5x.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 1.33 # model depth multiple
4 | width_multiple: 1.25 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [10,13, 16,30, 33,23] # P3/8
9 | - [30,61, 62,45, 59,119] # P4/16
10 | - [116,90, 156,198, 373,326] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
16 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
17 | [-1, 3, BottleneckCSP, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
19 | [-1, 9, BottleneckCSP, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
21 | [-1, 9, BottleneckCSP, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
23 | [-1, 1, SPP, [1024, [5, 9, 13]]],
24 | [-1, 3, BottleneckCSP, [1024, False]], # 9
25 | ]
26 |
27 | # YOLOv5 head
28 | head:
29 | [[-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
31 | [[-1, 6], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, BottleneckCSP, [512, False]], # 13
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, 'nearest']],
36 | [[-1, 4], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 14], 1, Concat, [1]], # cat head P4
41 | [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 10], 1, Concat, [1]], # cat head P5
45 | [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
46 |
47 | [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/utils/roboflow.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import base64
3 | import io
4 | import cv2
5 | from PIL import Image
6 | import numpy as np
7 |
8 |
9 | def predict_image(image, api_key, url, confidence, overlap, idx):
10 | retval, buffer = cv2.imencode('.jpg', image)
11 | img_str = base64.b64encode(buffer)
12 | img_str = img_str.decode("ascii")
13 |
14 | # Construct the URL
15 | upload_url = "".join([
16 | url,
17 | "?api_key=",
18 | api_key,
19 | "&confidence=",
20 | str(confidence),
21 | "&overlap=",
22 | str(overlap),
23 | "&name=",
24 | str(idx),
25 | ".jpg"
26 | ])
27 |
28 | # POST to the API
29 | r = requests.post(upload_url, data=img_str, headers={
30 | "Content-Type": "application/x-www-form-urlencoded"
31 | })
32 |
33 | json = r.json()
34 |
35 | predictions = json["predictions"]
36 | formatted_predictions = []
37 | classes = []
38 |
39 | for pred in predictions:
40 | formatted_pred = [pred["x"], pred["y"], pred["width"], pred["height"], pred["confidence"]]
41 |
42 | # convert to top-left x/y from center
43 | formatted_pred[0] -= formatted_pred[2]/2
44 | formatted_pred[1] -= formatted_pred[3]/2
45 |
46 | formatted_predictions.append(formatted_pred)
47 | classes.append(pred["class"])
48 |
49 | #print(formatted_predictions)
50 |
51 | return formatted_predictions, classes
52 |
--------------------------------------------------------------------------------
/utils/yolov4.py:
--------------------------------------------------------------------------------
1 | from tool.utils import *
2 | from tool.torch_utils import *
3 | from tool.darknet2pytorch import Darknet
4 | from utils.general import non_max_suppression, xyxy2xywh
5 | import cv2
6 | import torch
7 |
8 | class Yolov4Engine:
9 | def __init__(self, weights, cfgfile, device, names, classes, conf_thres, iou_thres, agnostic_nms, augment, half):
10 | self.model = Darknet(cfgfile)
11 | self.model.load_weights(weights[0])
12 | self.device = device
13 |
14 | if self.device != "cpu":
15 | self.model.cuda()
16 |
17 | self.classes = classes
18 | self.names = load_class_names(names)
19 | self.conf_thres = conf_thres
20 | self.iou_thres = iou_thres
21 | self.augment = augment
22 | self.agnostic_nms = agnostic_nms
23 |
24 | def infer(self, img):
25 | img_resized = cv2.resize(img, (self.model.width, self.model.height))
26 | pred = do_detect(self.model, img_resized, self.conf_thres, self.iou_thres, self.device != "cpu")[0]
27 | return np.array(pred)
28 |
29 | def postprocess(self, pred, img_shape):
30 | height = img_shape[0]
31 | width = img_shape[1]
32 | classes = pred[:, 6].tolist()
33 | for i, cls in enumerate(classes):
34 | classes[i] = self.names[int(cls)]
35 |
36 | dets = pred[:, :5]
37 | for i, det in enumerate(dets):
38 | box = det
39 | x1 = int(box[0] * width)
40 | y1 = int(box[1] * height)
41 | x2 = int(box[2] * width)
42 | y2 = int(box[3] * height)
43 | newDet = [x1,y2-(y2-y1),x2-x1,y2-y1,det[4]]
44 | dets[i] = newDet
45 | return pred, classes
46 |
47 |
48 | def nms(self, pred):
49 | out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
50 | return out
51 |
--------------------------------------------------------------------------------
/utils/yolov5.py:
--------------------------------------------------------------------------------
1 | from models.experimental import attempt_load
2 | from utils.general import non_max_suppression
3 |
4 | class Yolov5Engine:
5 | def __init__(self, weights, device, classes, conf_thres, iou_thres, agnostic_nms, augment, half):
6 | self.model = attempt_load(weights, map_location=device)
7 | if half:
8 | self.model.half()
9 | self.classes = classes
10 | self.conf_thres = conf_thres
11 | self.iou_thres = iou_thres
12 | self.augment = augment
13 | self.agnostic_nms = agnostic_nms
14 |
15 | def infer(self, img):
16 | pred = self.model(img, augment=self.augment)[0]
17 | pred = self.nms(pred)
18 | return pred
19 |
20 | def nms(self, pred):
21 | out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
22 | return out
23 |
24 | def get_names(self):
25 | return self.model.module.names if hasattr(self.model, 'module') else self.model.names
--------------------------------------------------------------------------------
/utils/yolov7.py:
--------------------------------------------------------------------------------
1 | from models.experimental import attempt_load
2 | from utils.general import non_max_suppression
3 |
4 | class Yolov7Engine:
5 | def __init__(self, weights, device, classes, conf_thres, iou_thres, agnostic_nms, augment, half):
6 | self.model = attempt_load(weights, map_location=device)
7 | if half:
8 | self.model.half()
9 | self.classes = classes
10 | self.conf_thres = conf_thres
11 | self.iou_thres = iou_thres
12 | self.augment = augment
13 | self.agnostic_nms = agnostic_nms
14 |
15 | def infer(self, img):
16 | pred = self.model(img, augment=self.augment)[0]
17 | pred = self.nms(pred)
18 | return pred
19 |
20 | def nms(self, pred):
21 | out = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms)
22 | return out
23 |
24 | def get_names(self):
25 | return self.model.module.names if hasattr(self.model, 'module') else self.model.names
--------------------------------------------------------------------------------