├── .gitignore ├── LICENSE ├── README.assets ├── 20200412221106751.png ├── 2020041418343015.png ├── 20200415100437671.png └── DeepSort.jpg ├── README.md ├── bak_results.txt ├── cfg ├── csresnext50-panet-spp.cfg ├── darknet19-3cls.cfg ├── darknet19-3l.cfg ├── mobile-yolo-cem.cfg ├── yolov3-1cls.cfg ├── yolov3-attention.cfg ├── yolov3-cbam.cfg ├── yolov3-dla.cfg ├── yolov3-se.cfg ├── yolov3-spp-1cls.cfg ├── yolov3-spp-3cls.cfg ├── yolov3-spp-matrix.cfg ├── yolov3-spp-pan-scale.cfg ├── yolov3-spp.cfg ├── yolov3-spp3.cfg ├── yolov3-tiny-1cls.cfg ├── yolov3-tiny-3cls.cfg ├── yolov3-tiny-cbam.cfg ├── yolov3-tiny.cfg ├── yolov3-tiny_3l.cfg ├── yolov3.cfg ├── yolov3_5l.cfg └── yolov3s.cfg ├── data ├── gcp.sh ├── get_coco2014.sh ├── get_coco2017.sh └── get_coco_dataset.sh ├── deep_sort.py ├── deep_sort ├── __init__.py ├── deep │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── checkpoint │ │ ├── .gitkeep │ │ └── ckpt.t7 │ ├── eval.py │ ├── feature_extractor.py │ ├── model.py │ ├── models │ │ ├── __init__.py │ │ ├── densenet.py │ │ ├── hacnn.py │ │ ├── inceptionresnetv2.py │ │ ├── inceptionv4.py │ │ ├── mlfn.py │ │ ├── mobilenetv2.py │ │ ├── mudeep.py │ │ ├── nasnet.py │ │ ├── original_model.py │ │ ├── osnet.py │ │ ├── osnet_ain.py │ │ ├── pcb.py │ │ ├── resnet.py │ │ ├── resnet_ibn_a.py │ │ ├── resnet_ibn_b.py │ │ ├── resnetmid.py │ │ ├── senet.py │ │ ├── shufflenet.py │ │ ├── shufflenetv2.py │ │ ├── squeezenet.py │ │ └── xception.py │ ├── oldfeature_extractor.py │ ├── train.py │ ├── train_wo_center.py │ └── utils │ │ ├── assign_train_val.py │ │ ├── center_loss.py │ │ ├── compute_mean_std.py │ │ ├── pre_deep.py │ │ ├── rename_all.py │ │ ├── tsne_vis.py │ │ └── visualize_actmap.py ├── deep_sort.py └── sort │ ├── __init__.py │ ├── detection.py │ ├── iou_matching.py │ ├── kalman_filter.py │ ├── linear_assignment.py │ ├── nn_matching.py │ ├── preprocessing.py │ ├── track.py │ └── tracker.py ├── detect.py ├── eval_mot.py ├── miniversion ├── cow.names ├── cv2MOT.py ├── models.py ├── predict.py ├── utils │ ├── __init__.py │ ├── adabound.py │ ├── datasets.py │ ├── gcp.sh │ ├── google_utils.py │ ├── parse_config.py │ ├── torch_utils.py │ ├── utils.py │ ├── utils_sort.py │ └── visdom.py └── yolov3-cbam.cfg ├── models.py ├── pre_mot.py ├── predict.py ├── sort.py ├── sort ├── LICENSE ├── README.md ├── __init__.py ├── data │ ├── ADL-Rundle-6 │ │ └── det.txt │ ├── ADL-Rundle-8 │ │ └── det.txt │ ├── ETH-Bahnhof │ │ └── det.txt │ ├── ETH-Pedcross2 │ │ └── det.txt │ ├── ETH-Sunnyday │ │ └── det.txt │ ├── KITTI-13 │ │ └── det.txt │ ├── KITTI-17 │ │ └── det.txt │ ├── PETS09-S2L1 │ │ └── det.txt │ ├── TUD-Campus │ │ └── det.txt │ ├── TUD-Stadtmitte │ │ └── det.txt │ └── Venice-2 │ │ └── det.txt ├── requirements.txt └── sort.py ├── test.py ├── train.py ├── utils ├── __init__.py ├── adabound.py ├── anchor_cluster.py ├── datasets.py ├── gcp.sh ├── google_utils.py ├── layers.py ├── parse_config.py ├── process_darklabel.py ├── torch_utils.py ├── tsne_vis.py ├── utils.py ├── utils_sort.py └── visdom.py └── weights ├── download_yolov3_weights.sh └── gcp.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Repo-specific GitIgnore ---------------------------------------------------------------------------------------------- 2 | *.jpg 3 | *.png 4 | *.bmp 5 | *.tif 6 | *.heic 7 | *.JPG 8 | *.PNG 9 | *.TIF 10 | *.HEIC 11 | *.mp4 12 | *.mov 13 | *.MOV 14 | *.avi 15 | *.data 16 | *.json 17 | 18 | #*.cfg 19 | !cfg/yolov3*.cfg 20 | 21 | storage.googleapis.com 22 | runs/* 23 | data/* 24 | !README.assets/*.png 25 | !README.assets/*.jpg 26 | !data/samples/zidane.jpg 27 | !data/samples/bus.jpg 28 | !data/coco.names 29 | !data/coco_paper.names 30 | !data/coco.data 31 | !data/coco_*.data 32 | !data/coco_*.txt 33 | !data/trainvalno5k.shapes 34 | !data/*.sh 35 | 36 | pycocotools/* 37 | results*.txt 38 | gcp_test*.sh 39 | 40 | # MATLAB GitIgnore ----------------------------------------------------------------------------------------------------- 41 | *.m~ 42 | *.mat 43 | !targets*.mat 44 | 45 | # Neural Network weights ----------------------------------------------------------------------------------------------- 46 | *.weights 47 | *.pt 48 | *.onnx 49 | *.mlmodel 50 | darknet53.conv.74 51 | yolov3-tiny.conv.15 52 | 53 | # GitHub Python GitIgnore ---------------------------------------------------------------------------------------------- 54 | # Byte-compiled / optimized / DLL files 55 | __pycache__/ 56 | *.py[cod] 57 | *$py.class 58 | 59 | # C extensions 60 | *.so 61 | 62 | # Distribution / packaging 63 | .Python 64 | env/ 65 | build/ 66 | develop-eggs/ 67 | dist/ 68 | downloads/ 69 | eggs/ 70 | .eggs/ 71 | lib/ 72 | lib64/ 73 | parts/ 74 | sdist/ 75 | var/ 76 | wheels/ 77 | *.egg-info/ 78 | .installed.cfg 79 | *.egg 80 | 81 | # PyInstaller 82 | # Usually these files are written by a python script from a template 83 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 84 | *.manifest 85 | *.spec 86 | 87 | # Installer logs 88 | pip-log.txt 89 | pip-delete-this-directory.txt 90 | 91 | # Unit test / coverage reports 92 | htmlcov/ 93 | .tox/ 94 | .coverage 95 | .coverage.* 96 | .cache 97 | nosetests.xml 98 | coverage.xml 99 | *.cover 100 | .hypothesis/ 101 | 102 | # Translations 103 | *.mo 104 | *.pot 105 | 106 | # Django stuff: 107 | *.log 108 | local_settings.py 109 | 110 | # Flask stuff: 111 | instance/ 112 | .webassets-cache 113 | 114 | # Scrapy stuff: 115 | .scrapy 116 | 117 | # Sphinx documentation 118 | docs/_build/ 119 | 120 | # PyBuilder 121 | target/ 122 | 123 | # Jupyter Notebook 124 | .ipynb_checkpoints 125 | 126 | # pyenv 127 | .python-version 128 | 129 | # celery beat schedule file 130 | celerybeat-schedule 131 | 132 | # SageMath parsed files 133 | *.sage.py 134 | 135 | # dotenv 136 | .env 137 | 138 | # virtualenv 139 | .venv 140 | venv/ 141 | ENV/ 142 | 143 | # Spyder project settings 144 | .spyderproject 145 | .spyproject 146 | 147 | # Rope project settings 148 | .ropeproject 149 | 150 | # mkdocs documentation 151 | /site 152 | 153 | # mypy 154 | .mypy_cache/ 155 | 156 | 157 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore ----------------------------------------------- 158 | 159 | # General 160 | .DS_Store 161 | .AppleDouble 162 | .LSOverride 163 | 164 | # Icon must end with two \r 165 | Icon 166 | Icon? 167 | 168 | # Thumbnails 169 | ._* 170 | 171 | # Files that might appear in the root of a volume 172 | .DocumentRevisions-V100 173 | .fseventsd 174 | .Spotlight-V100 175 | .TemporaryItems 176 | .Trashes 177 | .VolumeIcon.icns 178 | .com.apple.timemachine.donotpresent 179 | 180 | # Directories potentially created on remote AFP share 181 | .AppleDB 182 | .AppleDesktop 183 | Network Trash Folder 184 | Temporary Items 185 | .apdisk 186 | 187 | 188 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore 189 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm 190 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 191 | 192 | # User-specific stuff: 193 | .idea/* 194 | .idea/**/workspace.xml 195 | .idea/**/tasks.xml 196 | .idea/dictionaries 197 | .html # Bokeh Plots 198 | .pg # TensorFlow Frozen Graphs 199 | .avi # videos 200 | 201 | # Sensitive or high-churn files: 202 | .idea/**/dataSources/ 203 | .idea/**/dataSources.ids 204 | .idea/**/dataSources.local.xml 205 | .idea/**/sqlDataSources.xml 206 | .idea/**/dynamic.xml 207 | .idea/**/uiDesigner.xml 208 | 209 | # Gradle: 210 | .idea/**/gradle.xml 211 | .idea/**/libraries 212 | 213 | # CMake 214 | cmake-build-debug/ 215 | cmake-build-release/ 216 | 217 | # Mongo Explorer plugin: 218 | .idea/**/mongoSettings.xml 219 | 220 | ## File-based project format: 221 | *.iws 222 | 223 | ## Plugin-specific files: 224 | 225 | # IntelliJ 226 | out/ 227 | 228 | # mpeltonen/sbt-idea plugin 229 | .idea_modules/ 230 | 231 | # JIRA plugin 232 | atlassian-ide-plugin.xml 233 | 234 | # Cursive Clojure plugin 235 | .idea/replstate.xml 236 | 237 | # Crashlytics plugin (for Android Studio and IntelliJ) 238 | com_crashlytics_export_strings.xml 239 | crashlytics.properties 240 | crashlytics-build.properties 241 | fabric.properties 242 | -------------------------------------------------------------------------------- /README.assets/20200412221106751.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/20200412221106751.png -------------------------------------------------------------------------------- /README.assets/2020041418343015.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/2020041418343015.png -------------------------------------------------------------------------------- /README.assets/20200415100437671.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/20200415100437671.png -------------------------------------------------------------------------------- /README.assets/DeepSort.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/DeepSort.jpg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DEEP SORT YOLOV3 PYTORCH 2 | 3 | 论文发表:[1]张宏鸣,汪润,董佩杰,孙红光,李书琴,王红艳.基于LSRCEM-YOLO算法的肉牛多目标跟踪[J/OL].农业机械学报:1-14[2022-03-07].https://kns-cnki-net-s.nudtproxy.yitlink.com:443/kcms/detail/11.1964.S.20210223.0955.004.html. 4 | 5 | 6 | ## 新特性 7 | 8 | - 目标检测部分添加了常用的注意力模块CBAM, SE 9 | 10 | - 添加了使用OpenCV进行目标跟踪的算法,第一帧使用YOLOv3进行检测。(在miniversion文件夹) 11 | 12 | - 添加了SORT算法 13 | 14 | - 完善ReID部分的训练 15 | 16 | ## 快速入门 17 | 18 | - [通过DarkLabel构建DeepSort标注格式和ReID数据集](https://zhuanlan.zhihu.com/p/137430266) 19 | 20 | - [Deep SORT多目标跟踪算法代码解析(上)](https://zhuanlan.zhihu.com/p/133678626) 21 | 22 | - [Deep SORT多目标跟踪算法代码解析(下)](https://zhuanlan.zhihu.com/p/133689982) 23 | 24 | 25 | ## 组织结构 26 | 27 | cfg: cfg网络结构文件存放位置 28 | 29 | deep_sort 30 | 31 | - deep: reid模块来自 https://github.com/pprp/reid_for_deepsort 32 | - sort: deep sort沿用了sort中的一些模块,是最核心的部分 33 | 34 | miniversion: 使用cv2中的跟踪模块+yolov3进行跟踪,效果较差 35 | 36 | sort: sort算法需要的依赖文件 37 | 38 | utils: yolov3中的包 39 | 40 | weights: yolov3权重存放位置 41 | 42 | deep_sort.py: 仅仅通过运行deep_sort完成目标跟踪过程,保存跟踪的结果(视频文件) 43 | 44 | detect.py: 沿用自yolov3,用于侦测目标。 45 | 46 | pre_mot.py:进行跟踪,并将结果文件保存下来。 47 | 48 | eval_mot.py: 对跟踪的结果文件进行评估,得到指标。 49 | 50 | models.py: 沿用自yolov3,是模型构建的代码。 51 | 52 | predict.py:沿用自yolov3,侦测单张图片。 53 | 54 | sort.py: sort算法再次调用 55 | 56 | train.py: 训练yolov3 57 | 58 | test.py: 测试yolov3 59 | 60 | 61 | 62 | ## 代码注释 63 | 64 | 完整讲解《Deep SORT多目标跟踪算法代码解析》在GiantPandaCV公众号首发,欢迎关注。 65 | 66 | 主要提供了deep_sort文件夹中绝大部分代码的注释,以下是根据代码绘制的类图结构: 67 | 68 | ![DeepSort](README.assets/DeepSort.jpg) 69 | 70 | 状态转移: 71 | 72 | ![状态转换图](README.assets/20200415100437671.png) 73 | 74 | 整体框架: 75 | 76 | ![图片来自知乎Harlek](README.assets/20200412221106751.png) 77 | 78 | 流程图: 79 | 80 | ![知乎@猫弟总结的deep sort流程图](README.assets/2020041418343015.png) 81 | 82 | ## 参考 83 | 84 | 目标检测:基于U版yolov3(版本比较早) https://github.com/ultralytics/yolov3 85 | 86 | ReID部分: https://github.com/pprp/reid_for_deepsort 87 | 88 | Deep SORT参考: https://github.com/ZQPei/deep_sort_pytorch 89 | 90 | SORT参考: https://github.com/abewley/sort 91 | 92 | 93 | -------------------------------------------------------------------------------- /cfg/darknet19-3cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | max_crop=448 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | # 1 17 | [convolutional] 18 | batch_normalize=1 19 | filters=32 20 | size=3 21 | stride=1 22 | pad=1 23 | activation=leaky 24 | 25 | [maxpool] 26 | size=2 27 | stride=2 28 | 29 | # 3 30 | [convolutional] 31 | batch_normalize=1 32 | filters=64 33 | size=3 34 | stride=1 35 | pad=1 36 | activation=leaky 37 | 38 | [maxpool] 39 | size=2 40 | stride=2 41 | 42 | # 5 43 | [convolutional] 44 | batch_normalize=1 45 | filters=128 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=1 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | # 7 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | # 9 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=1 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | # 11 90 | [convolutional] 91 | batch_normalize=1 92 | filters=256 93 | size=3 94 | stride=1 95 | pad=1 96 | activation=leaky 97 | 98 | [maxpool] 99 | size=2 100 | stride=2 101 | 102 | # 13 103 | [convolutional] 104 | batch_normalize=1 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | # 15 120 | [convolutional] 121 | batch_normalize=1 122 | filters=512 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=256 131 | size=1 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | # 17 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | # 19 150 | [convolutional] 151 | batch_normalize=1 152 | filters=1024 153 | size=3 154 | stride=1 155 | pad=1 156 | activation=leaky 157 | 158 | [convolutional] 159 | batch_normalize=1 160 | filters=512 161 | size=1 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | # 21 167 | [convolutional] 168 | batch_normalize=1 169 | filters=1024 170 | size=3 171 | stride=1 172 | pad=1 173 | activation=leaky 174 | 175 | [convolutional] 176 | batch_normalize=1 177 | filters=512 178 | size=1 179 | stride=1 180 | pad=1 181 | activation=leaky 182 | 183 | #23 184 | [convolutional] 185 | batch_normalize=1 186 | filters=1024 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | ######################## 193 | 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | filters=512 198 | size=1 199 | stride=1 200 | pad=1 201 | activation=leaky 202 | 203 | [convolutional] 204 | batch_normalize=1 205 | size=3 206 | stride=1 207 | pad=1 208 | filters=1024 209 | activation=leaky 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | filters=512 214 | size=1 215 | stride=1 216 | pad=1 217 | activation=leaky 218 | 219 | [convolutional] 220 | batch_normalize=1 221 | size=3 222 | stride=1 223 | pad=1 224 | filters=1024 225 | activation=leaky 226 | 227 | [convolutional] 228 | batch_normalize=1 229 | filters=512 230 | size=1 231 | stride=1 232 | pad=1 233 | activation=leaky 234 | 235 | [convolutional] 236 | batch_normalize=1 237 | size=3 238 | stride=1 239 | pad=1 240 | filters=1024 241 | activation=leaky 242 | 243 | [convolutional] 244 | size=1 245 | stride=1 246 | pad=1 247 | filters=255 248 | activation=linear 249 | 250 | 251 | [yolo] 252 | mask = 6,7,8 253 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 254 | classes=80 255 | num=9 256 | jitter=.3 257 | ignore_thresh = .7 258 | truth_thresh = 1 259 | random=1 260 | 261 | 262 | [route] 263 | layers = -4 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=256 268 | size=1 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [upsample] 274 | stride=2 275 | 276 | [route] 277 | layers = -1, 17 278 | 279 | 280 | 281 | [convolutional] 282 | batch_normalize=1 283 | filters=256 284 | size=1 285 | stride=1 286 | pad=1 287 | activation=leaky 288 | 289 | [convolutional] 290 | batch_normalize=1 291 | size=3 292 | stride=1 293 | pad=1 294 | filters=512 295 | activation=leaky 296 | 297 | [convolutional] 298 | batch_normalize=1 299 | filters=256 300 | size=1 301 | stride=1 302 | pad=1 303 | activation=leaky 304 | 305 | [convolutional] 306 | batch_normalize=1 307 | size=3 308 | stride=1 309 | pad=1 310 | filters=512 311 | activation=leaky 312 | 313 | [convolutional] 314 | batch_normalize=1 315 | filters=256 316 | size=1 317 | stride=1 318 | pad=1 319 | activation=leaky 320 | 321 | [convolutional] 322 | batch_normalize=1 323 | size=3 324 | stride=1 325 | pad=1 326 | filters=512 327 | activation=leaky 328 | 329 | [convolutional] 330 | size=1 331 | stride=1 332 | pad=1 333 | filters=255 334 | activation=linear 335 | 336 | 337 | [yolo] 338 | mask = 3,4,5 339 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 340 | classes=80 341 | num=9 342 | jitter=.3 343 | ignore_thresh = .7 344 | truth_thresh = 1 345 | random=1 346 | 347 | 348 | 349 | [route] 350 | layers = -4 351 | 352 | [convolutional] 353 | batch_normalize=1 354 | filters=128 355 | size=1 356 | stride=1 357 | pad=1 358 | activation=leaky 359 | 360 | [upsample] 361 | stride=2 362 | 363 | [route] 364 | layers = -1, 11 365 | 366 | 367 | 368 | [convolutional] 369 | batch_normalize=1 370 | filters=128 371 | size=1 372 | stride=1 373 | pad=1 374 | activation=leaky 375 | 376 | [convolutional] 377 | batch_normalize=1 378 | size=3 379 | stride=1 380 | pad=1 381 | filters=256 382 | activation=leaky 383 | 384 | [convolutional] 385 | batch_normalize=1 386 | filters=128 387 | size=1 388 | stride=1 389 | pad=1 390 | activation=leaky 391 | 392 | [convolutional] 393 | batch_normalize=1 394 | size=3 395 | stride=1 396 | pad=1 397 | filters=256 398 | activation=leaky 399 | 400 | [convolutional] 401 | batch_normalize=1 402 | filters=128 403 | size=1 404 | stride=1 405 | pad=1 406 | activation=leaky 407 | 408 | [convolutional] 409 | batch_normalize=1 410 | size=3 411 | stride=1 412 | pad=1 413 | filters=256 414 | activation=leaky 415 | 416 | [convolutional] 417 | size=1 418 | stride=1 419 | pad=1 420 | filters=255 421 | activation=linear 422 | 423 | 424 | [yolo] 425 | mask = 0,1,2 426 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 427 | classes=80 428 | num=9 429 | jitter=.3 430 | ignore_thresh = .7 431 | truth_thresh = 1 432 | random=1 433 | -------------------------------------------------------------------------------- /cfg/darknet19-3l.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=128 3 | subdivisions=1 4 | height=224 5 | width=224 6 | channels=3 7 | momentum=0.9 8 | decay=0.0005 9 | max_crop=448 10 | 11 | learning_rate=0.1 12 | policy=poly 13 | power=4 14 | max_batches=1600000 15 | 16 | # 1 17 | [convolutional] 18 | batch_normalize=1 19 | filters=32 20 | size=3 21 | stride=1 22 | pad=1 23 | activation=leaky 24 | 25 | [maxpool] 26 | size=2 27 | stride=2 28 | 29 | # 3 30 | [convolutional] 31 | batch_normalize=1 32 | filters=64 33 | size=3 34 | stride=1 35 | pad=1 36 | activation=leaky 37 | 38 | [maxpool] 39 | size=2 40 | stride=2 41 | 42 | # 5 43 | [convolutional] 44 | batch_normalize=1 45 | filters=128 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=1 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | # 7 60 | [convolutional] 61 | batch_normalize=1 62 | filters=128 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [maxpool] 69 | size=2 70 | stride=2 71 | 72 | # 9 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [convolutional] 82 | batch_normalize=1 83 | filters=128 84 | size=1 85 | stride=1 86 | pad=1 87 | activation=leaky 88 | 89 | # 11 90 | [convolutional] 91 | batch_normalize=1 92 | filters=256 93 | size=3 94 | stride=1 95 | pad=1 96 | activation=leaky 97 | 98 | [maxpool] 99 | size=2 100 | stride=2 101 | 102 | # 13 103 | [convolutional] 104 | batch_normalize=1 105 | filters=512 106 | size=3 107 | stride=1 108 | pad=1 109 | activation=leaky 110 | 111 | [convolutional] 112 | batch_normalize=1 113 | filters=256 114 | size=1 115 | stride=1 116 | pad=1 117 | activation=leaky 118 | 119 | # 15 120 | [convolutional] 121 | batch_normalize=1 122 | filters=512 123 | size=3 124 | stride=1 125 | pad=1 126 | activation=leaky 127 | 128 | [convolutional] 129 | batch_normalize=1 130 | filters=256 131 | size=1 132 | stride=1 133 | pad=1 134 | activation=leaky 135 | 136 | # 17 137 | [convolutional] 138 | batch_normalize=1 139 | filters=512 140 | size=3 141 | stride=1 142 | pad=1 143 | activation=leaky 144 | 145 | [maxpool] 146 | size=2 147 | stride=2 148 | 149 | # 19 150 | [convolutional] 151 | batch_normalize=1 152 | filters=1024 153 | size=3 154 | stride=1 155 | pad=1 156 | activation=leaky 157 | 158 | [convolutional] 159 | batch_normalize=1 160 | filters=512 161 | size=1 162 | stride=1 163 | pad=1 164 | activation=leaky 165 | 166 | # 21 167 | [convolutional] 168 | batch_normalize=1 169 | filters=1024 170 | size=3 171 | stride=1 172 | pad=1 173 | activation=leaky 174 | 175 | [convolutional] 176 | batch_normalize=1 177 | filters=512 178 | size=1 179 | stride=1 180 | pad=1 181 | activation=leaky 182 | 183 | #23 184 | [convolutional] 185 | batch_normalize=1 186 | filters=1024 187 | size=3 188 | stride=1 189 | pad=1 190 | activation=leaky 191 | 192 | ######################## 193 | 194 | 195 | [convolutional] 196 | batch_normalize=1 197 | filters=512 198 | size=1 199 | stride=1 200 | pad=1 201 | activation=leaky 202 | 203 | [convolutional] 204 | batch_normalize=1 205 | size=3 206 | stride=1 207 | pad=1 208 | filters=1024 209 | activation=leaky 210 | 211 | [convolutional] 212 | batch_normalize=1 213 | filters=512 214 | size=1 215 | stride=1 216 | pad=1 217 | activation=leaky 218 | 219 | [convolutional] 220 | batch_normalize=1 221 | size=3 222 | stride=1 223 | pad=1 224 | filters=1024 225 | activation=leaky 226 | 227 | [convolutional] 228 | batch_normalize=1 229 | filters=512 230 | size=1 231 | stride=1 232 | pad=1 233 | activation=leaky 234 | 235 | [convolutional] 236 | batch_normalize=1 237 | size=3 238 | stride=1 239 | pad=1 240 | filters=1024 241 | activation=leaky 242 | 243 | [convolutional] 244 | size=1 245 | stride=1 246 | pad=1 247 | filters=18 248 | activation=linear 249 | 250 | 251 | [yolo] 252 | mask = 6,7,8 253 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 254 | classes=1 255 | num=9 256 | jitter=.3 257 | ignore_thresh = .7 258 | truth_thresh = 1 259 | random=1 260 | 261 | 262 | [route] 263 | layers = -4 264 | 265 | [convolutional] 266 | batch_normalize=1 267 | filters=256 268 | size=1 269 | stride=1 270 | pad=1 271 | activation=leaky 272 | 273 | [upsample] 274 | stride=2 275 | 276 | [route] 277 | layers = -1, 16 278 | 279 | 280 | 281 | [convolutional] 282 | batch_normalize=1 283 | filters=256 284 | size=1 285 | stride=1 286 | pad=1 287 | activation=leaky 288 | 289 | [convolutional] 290 | batch_normalize=1 291 | size=3 292 | stride=1 293 | pad=1 294 | filters=512 295 | activation=leaky 296 | 297 | [convolutional] 298 | batch_normalize=1 299 | filters=256 300 | size=1 301 | stride=1 302 | pad=1 303 | activation=leaky 304 | 305 | [convolutional] 306 | batch_normalize=1 307 | size=3 308 | stride=1 309 | pad=1 310 | filters=512 311 | activation=leaky 312 | 313 | [convolutional] 314 | batch_normalize=1 315 | filters=256 316 | size=1 317 | stride=1 318 | pad=1 319 | activation=leaky 320 | 321 | [convolutional] 322 | batch_normalize=1 323 | size=3 324 | stride=1 325 | pad=1 326 | filters=512 327 | activation=leaky 328 | 329 | [convolutional] 330 | size=1 331 | stride=1 332 | pad=1 333 | filters=18 334 | activation=linear 335 | 336 | 337 | [yolo] 338 | mask = 3,4,5 339 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 340 | classes=1 341 | num=9 342 | jitter=.3 343 | ignore_thresh = .7 344 | truth_thresh = 1 345 | random=1 346 | 347 | 348 | 349 | [route] 350 | layers = -4 351 | 352 | [convolutional] 353 | batch_normalize=1 354 | filters=128 355 | size=1 356 | stride=1 357 | pad=1 358 | activation=leaky 359 | 360 | [upsample] 361 | stride=2 362 | 363 | [route] 364 | layers = -1, 10 365 | 366 | 367 | 368 | [convolutional] 369 | batch_normalize=1 370 | filters=128 371 | size=1 372 | stride=1 373 | pad=1 374 | activation=leaky 375 | 376 | [convolutional] 377 | batch_normalize=1 378 | size=3 379 | stride=1 380 | pad=1 381 | filters=256 382 | activation=leaky 383 | 384 | [convolutional] 385 | batch_normalize=1 386 | filters=128 387 | size=1 388 | stride=1 389 | pad=1 390 | activation=leaky 391 | 392 | [convolutional] 393 | batch_normalize=1 394 | size=3 395 | stride=1 396 | pad=1 397 | filters=256 398 | activation=leaky 399 | 400 | [convolutional] 401 | batch_normalize=1 402 | filters=128 403 | size=1 404 | stride=1 405 | pad=1 406 | activation=leaky 407 | 408 | [convolutional] 409 | batch_normalize=1 410 | size=3 411 | stride=1 412 | pad=1 413 | filters=256 414 | activation=leaky 415 | 416 | [convolutional] 417 | size=1 418 | stride=1 419 | pad=1 420 | filters=18 421 | activation=linear 422 | 423 | 424 | [yolo] 425 | mask = 0,1,2 426 | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 427 | classes=1 428 | num=9 429 | jitter=.3 430 | ignore_thresh = .7 431 | truth_thresh = 1 432 | random=1 433 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny-1cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=18 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=1 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=18 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=1 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny-3cls.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=24 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=3 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=24 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 0,1,2 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=3 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny-cbam.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 200000 21 | policy=steps 22 | steps=180000,190000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | [cbam] 35 | size=7 36 | 37 | [maxpool] 38 | size=2 39 | stride=2 40 | 41 | [convolutional] 42 | batch_normalize=1 43 | filters=32 44 | size=3 45 | stride=1 46 | pad=1 47 | activation=leaky 48 | 49 | [cbam] 50 | size=7 51 | 52 | [maxpool] 53 | size=2 54 | stride=2 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=64 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [cbam] 65 | size=7 66 | 67 | [maxpool] 68 | size=2 69 | stride=2 70 | 71 | [convolutional] 72 | batch_normalize=1 73 | filters=128 74 | size=3 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [cbam] 80 | size=7 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=256 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [cbam] 95 | size=7 96 | 97 | [maxpool] 98 | size=2 99 | stride=2 100 | 101 | [convolutional] 102 | batch_normalize=1 103 | filters=512 104 | size=3 105 | stride=1 106 | pad=1 107 | activation=leaky 108 | 109 | [cbam] 110 | size=7 111 | 112 | [maxpool] 113 | size=2 114 | stride=1 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=1024 119 | size=3 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [cbam] 125 | size=7 126 | 127 | ########### 128 | 129 | [convolutional] 130 | batch_normalize=1 131 | filters=256 132 | size=1 133 | stride=1 134 | pad=1 135 | activation=leaky 136 | 137 | 138 | [convolutional] 139 | batch_normalize=1 140 | filters=512 141 | size=3 142 | stride=1 143 | pad=1 144 | activation=leaky 145 | 146 | 147 | 148 | [convolutional] 149 | size=1 150 | stride=1 151 | pad=1 152 | filters=18 153 | activation=linear 154 | 155 | [yolo] 156 | mask = 6,7,8 157 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 158 | classes=1 159 | num=9 160 | jitter=.3 161 | ignore_thresh = .7 162 | truth_thresh = 1 163 | random=1 164 | 165 | [route] 166 | layers = -6 167 | 168 | [convolutional] 169 | batch_normalize=1 170 | filters=128 171 | size=1 172 | stride=1 173 | pad=1 174 | activation=leaky 175 | 176 | 177 | [upsample] 178 | stride=2 179 | 180 | [route] 181 | layers = -1, 8 182 | 183 | [convolutional] 184 | batch_normalize=1 185 | filters=256 186 | size=3 187 | stride=1 188 | pad=1 189 | activation=leaky 190 | 191 | 192 | [convolutional] 193 | size=1 194 | stride=1 195 | pad=1 196 | filters=18 197 | activation=linear 198 | 199 | [yolo] 200 | mask = 3,4,5 201 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 202 | classes=1 203 | num=9 204 | jitter=.3 205 | ignore_thresh = .7 206 | truth_thresh = 1 207 | random=1 208 | 209 | [route] 210 | layers = -5 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=128 215 | size=1 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | 221 | [upsample] 222 | stride=2 223 | 224 | [route] 225 | layers = -1, 6 226 | 227 | [convolutional] 228 | batch_normalize=1 229 | filters=128 230 | size=3 231 | stride=1 232 | pad=1 233 | activation=leaky 234 | 235 | [convolutional] 236 | size=1 237 | stride=1 238 | pad=1 239 | filters=18 240 | activation=linear 241 | 242 | [yolo] 243 | mask = 0,1,2 244 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 245 | classes=1 246 | num=9 247 | jitter=.3 248 | ignore_thresh = .7 249 | truth_thresh = 1 250 | random=1 -------------------------------------------------------------------------------- /cfg/yolov3-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | batch=1 4 | subdivisions=1 5 | # Training 6 | # batch=64 7 | # subdivisions=2 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=16 28 | size=3 29 | stride=1 30 | pad=1 31 | activation=leaky 32 | 33 | [maxpool] 34 | size=2 35 | stride=2 36 | 37 | [convolutional] 38 | batch_normalize=1 39 | filters=32 40 | size=3 41 | stride=1 42 | pad=1 43 | activation=leaky 44 | 45 | [maxpool] 46 | size=2 47 | stride=2 48 | 49 | [convolutional] 50 | batch_normalize=1 51 | filters=64 52 | size=3 53 | stride=1 54 | pad=1 55 | activation=leaky 56 | 57 | [maxpool] 58 | size=2 59 | stride=2 60 | 61 | [convolutional] 62 | batch_normalize=1 63 | filters=128 64 | size=3 65 | stride=1 66 | pad=1 67 | activation=leaky 68 | 69 | [maxpool] 70 | size=2 71 | stride=2 72 | 73 | [convolutional] 74 | batch_normalize=1 75 | filters=256 76 | size=3 77 | stride=1 78 | pad=1 79 | activation=leaky 80 | 81 | [maxpool] 82 | size=2 83 | stride=2 84 | 85 | [convolutional] 86 | batch_normalize=1 87 | filters=512 88 | size=3 89 | stride=1 90 | pad=1 91 | activation=leaky 92 | 93 | [maxpool] 94 | size=2 95 | stride=1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=1024 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | ########### 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=256 110 | size=1 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [convolutional] 116 | batch_normalize=1 117 | filters=512 118 | size=3 119 | stride=1 120 | pad=1 121 | activation=leaky 122 | 123 | [convolutional] 124 | size=1 125 | stride=1 126 | pad=1 127 | filters=255 128 | activation=linear 129 | 130 | 131 | 132 | [yolo] 133 | mask = 3,4,5 134 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 135 | classes=80 136 | num=6 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=255 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 1,2,3 176 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 177 | classes=80 178 | num=6 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | -------------------------------------------------------------------------------- /cfg/yolov3-tiny_3l.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | # batch=1 4 | # subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=16 8 | width=608 9 | height=608 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.001 19 | burn_in=1000 20 | max_batches = 200000 21 | policy=steps 22 | steps=180000,190000 23 | scales=.1,.1 24 | 25 | 26 | [convolutional] 27 | batch_normalize=1 28 | filters=16 29 | size=3 30 | stride=1 31 | pad=1 32 | activation=leaky 33 | 34 | 35 | [maxpool] 36 | size=2 37 | stride=2 38 | 39 | [convolutional] 40 | batch_normalize=1 41 | filters=32 42 | size=3 43 | stride=1 44 | pad=1 45 | activation=leaky 46 | 47 | [maxpool] 48 | size=2 49 | stride=2 50 | 51 | [convolutional] 52 | batch_normalize=1 53 | filters=64 54 | size=3 55 | stride=1 56 | pad=1 57 | activation=leaky 58 | 59 | [maxpool] 60 | size=2 61 | stride=2 62 | 63 | [convolutional] 64 | batch_normalize=1 65 | filters=128 66 | size=3 67 | stride=1 68 | pad=1 69 | activation=leaky 70 | 71 | [maxpool] 72 | size=2 73 | stride=2 74 | 75 | [convolutional] 76 | batch_normalize=1 77 | filters=256 78 | size=3 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [maxpool] 84 | size=2 85 | stride=2 86 | 87 | [convolutional] 88 | batch_normalize=1 89 | filters=512 90 | size=3 91 | stride=1 92 | pad=1 93 | activation=leaky 94 | 95 | [maxpool] 96 | size=2 97 | stride=1 98 | 99 | [convolutional] 100 | batch_normalize=1 101 | filters=1024 102 | size=3 103 | stride=1 104 | pad=1 105 | activation=leaky 106 | 107 | ########### 108 | 109 | [convolutional] 110 | batch_normalize=1 111 | filters=256 112 | size=1 113 | stride=1 114 | pad=1 115 | activation=leaky 116 | 117 | [convolutional] 118 | batch_normalize=1 119 | filters=512 120 | size=3 121 | stride=1 122 | pad=1 123 | activation=leaky 124 | 125 | [convolutional] 126 | size=1 127 | stride=1 128 | pad=1 129 | filters=18 130 | activation=linear 131 | 132 | [yolo] 133 | mask = 6,7,8 134 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 135 | classes=1 136 | num=9 137 | jitter=.3 138 | ignore_thresh = .7 139 | truth_thresh = 1 140 | random=1 141 | 142 | [route] 143 | layers = -4 144 | 145 | [convolutional] 146 | batch_normalize=1 147 | filters=128 148 | size=1 149 | stride=1 150 | pad=1 151 | activation=leaky 152 | 153 | [upsample] 154 | stride=2 155 | 156 | [route] 157 | layers = -1, 8 158 | 159 | [convolutional] 160 | batch_normalize=1 161 | filters=256 162 | size=3 163 | stride=1 164 | pad=1 165 | activation=leaky 166 | 167 | [convolutional] 168 | size=1 169 | stride=1 170 | pad=1 171 | filters=18 172 | activation=linear 173 | 174 | [yolo] 175 | mask = 3,4,5 176 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 177 | classes=1 178 | num=9 179 | jitter=.3 180 | ignore_thresh = .7 181 | truth_thresh = 1 182 | random=1 183 | 184 | 185 | 186 | [route] 187 | layers = -3 188 | 189 | [convolutional] 190 | batch_normalize=1 191 | filters=128 192 | size=1 193 | stride=1 194 | pad=1 195 | activation=leaky 196 | 197 | [upsample] 198 | stride=2 199 | 200 | [route] 201 | layers = -1, 6 202 | 203 | [convolutional] 204 | batch_normalize=1 205 | filters=128 206 | size=3 207 | stride=1 208 | pad=1 209 | activation=leaky 210 | 211 | [convolutional] 212 | size=1 213 | stride=1 214 | pad=1 215 | filters=18 216 | activation=linear 217 | 218 | [yolo] 219 | mask = 0,1,2 220 | anchors = 4,7, 7,15, 13,25, 25,42, 41,67, 75,94, 91,162, 158,205, 250,332 221 | classes=1 222 | num=9 223 | jitter=.3 224 | ignore_thresh = .7 225 | truth_thresh = 1 226 | random=1 -------------------------------------------------------------------------------- /data/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf yolov3 weights coco 5 | git clone https://github.com/ultralytics/yolov3 6 | bash yolov3/weights/download_yolov3_weights.sh && cp -r weights yolov3 7 | bash yolov3/data/get_coco_dataset.sh 8 | git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 9 | sudo reboot now 10 | 11 | # Re-clone 12 | rm -rf yolov3 13 | git clone https://github.com/ultralytics/yolov3 # master 14 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test # branch 15 | cp -r weights yolov3 16 | cp -r cocoapi/PythonAPI/pycocotools yolov3 17 | cd yolov3 18 | 19 | # Train 20 | python3 train.py 21 | 22 | # Resume 23 | python3 train.py --resume 24 | 25 | # Detect 26 | python3 detect.py 27 | 28 | # Test 29 | python3 test.py --save-json 30 | 31 | # Git pull 32 | git pull https://github.com/ultralytics/yolov3 # master 33 | git pull https://github.com/ultralytics/yolov3 test # branch 34 | 35 | # Test Darknet training 36 | python3 test.py --weights ../darknet/backup/yolov3.backup 37 | 38 | # Copy latest.pt TO bucket 39 | gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics 40 | 41 | # Copy latest.pt FROM bucket 42 | gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt 43 | wget https://storage.googleapis.com/ultralytics/yolov3/latest_v1_0.pt -O weights/latest_v1_0.pt 44 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt 45 | 46 | # Reproduce tutorials 47 | rm results*.txt # WARNING: removes existing results 48 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results3_1img.txt 49 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results3_10img.txt 50 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results4_100img.txt 51 | python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt 52 | python3 -c "from utils import utils; utils.plot_results()" 53 | gsutil cp results*.txt gs://ultralytics 54 | gsutil cp results.png gs://ultralytics 55 | sudo shutdown 56 | 57 | # Unit tests 58 | rm -rf yolov3 59 | git clone https://github.com/ultralytics/yolov3 # master 60 | cp -r weights yolov3 && cd yolov3 61 | python3 detect.py # detect 62 | python3 test.py --data data/coco_32img.data # test 63 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 64 | 65 | # Debug/Development 66 | rm -rf yolov3 67 | git clone https://github.com/ultralytics/yolov3 # master 68 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test # branch 69 | cp -r cocoapi/PythonAPI/pycocotools yolov3 70 | cp -r weights yolov3 && cd yolov3 71 | python3 train.py --evolve --data data/coco_100img.data --num-workers 2 --epochs 30 72 | gsutil cp evolve.txt gs://ultralytics 73 | sudo shutdown 74 | -------------------------------------------------------------------------------- /data/get_coco2014.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Zip coco folder 3 | # zip -r coco.zip coco 4 | # tar -czvf coco.tar.gz coco 5 | 6 | # Download labels from Google Drive, accepting presented query 7 | filename="coco2014labels.zip" 8 | fileid="1s6-CmF5_SElM28r52P1OUrCcuXZN-SFo" 9 | 10 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 11 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 12 | rm ./cookie 13 | 14 | # Unzip labels 15 | unzip -q ${filename} # for coco.zip 16 | # tar -xzf ${filename} # for coco.tar.gz 17 | rm ${filename} 18 | 19 | # Download images 20 | cd coco/images 21 | curl http://images.cocodataset.org/zips/train2014.zip -o train2014.zip 22 | curl http://images.cocodataset.org/zips/val2014.zip -o val2014.zip 23 | 24 | # Unzip images 25 | unzip -q train2014.zip 26 | unzip -q val2014.zip 27 | 28 | # (optional) Delete zip files 29 | rm -rf *.zip 30 | 31 | # cd out 32 | cd ../.. 33 | 34 | -------------------------------------------------------------------------------- /data/get_coco2017.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Zip coco folder 3 | # zip -r coco.zip coco 4 | # tar -czvf coco.tar.gz coco 5 | 6 | # Download labels from Google Drive, accepting presented query 7 | filename="coco2017labels.zip" 8 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L" 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 11 | rm ./cookie 12 | 13 | # Unzip labels 14 | unzip -q ${filename} # for coco.zip 15 | # tar -xzf ${filename} # for coco.tar.gz 16 | rm ${filename} 17 | 18 | # Download images 19 | cd coco/images 20 | curl http://images.cocodataset.org/zips/train2017.zip -o train2017.zip 21 | curl http://images.cocodataset.org/zips/val2017.zip -o val2017.zip 22 | 23 | # Unzip images 24 | unzip -q train2017.zip 25 | unzip -q val2017.zip 26 | 27 | # (optional) Delete zip files 28 | rm -rf *.zip 29 | 30 | # cd out 31 | cd ../.. 32 | 33 | -------------------------------------------------------------------------------- /data/get_coco_dataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh 3 | 4 | # Clone COCO API 5 | git clone https://github.com/pdollar/coco && cd coco 6 | 7 | # Download Images 8 | mkdir images && cd images 9 | wget -c https://pjreddie.com/media/files/train2014.zip 10 | wget -c https://pjreddie.com/media/files/val2014.zip 11 | 12 | # Unzip 13 | unzip -q train2014.zip 14 | unzip -q val2014.zip 15 | 16 | # (optional) Delete zip files 17 | rm -rf *.zip 18 | 19 | cd .. 20 | 21 | # Download COCO Metadata 22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip 23 | wget -c https://pjreddie.com/media/files/coco/5k.part 24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part 25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz 26 | tar xzf labels.tgz 27 | unzip -q instances_train-val2014.zip 28 | 29 | # Set Up Image Lists 30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt 31 | paste <(awk "{print \"$PWD\"}" trainvalno5k.txt 32 | 33 | # get xview training data 34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ' 35 | # tar -xvzf train_images.tgz 36 | # sudo rm -rf train_images/._* 37 | # lastly convert each .tif to a .bmp for faster loading in cv2 38 | 39 | # /home/glenn_jocher3/coco/images/train2014/COCO_train2014_000000167126.jpg # bad image?? 40 | -------------------------------------------------------------------------------- /deep_sort.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import argparse 5 | import torch 6 | import numpy as np 7 | 8 | from collections import deque 9 | from predict import InferYOLOv3 10 | from utils.utils import xyxy2xywh 11 | from deep_sort import DeepSort 12 | from utils.utils_sort import COLORS_10, draw_bboxes 13 | 14 | ''' 15 | mot results: 16 | ------------ 17 | frame, id(从1开始), tlwh(%.2f),1,-1,-1,-1 18 | 3,1,97.00,545.00,79.00,239.00,1,-1,-1,-1 19 | 3,2,376.24,396.64,83.44,252.43,1,-1,-1,-1 20 | 3,3,546.66,146.51,59.63,180.89,1,-1,-1,-1 21 | 3,4,1630.61,251.64,68.72,208.46,1,-1,-1,-1 22 | 3,5,1043.80,134.38,59.63,180.89,1,-1,-1,-1 23 | 3,6,792.96,148.08,55.57,168.71,1,-1,-1,-1 24 | 3,7,1732.55,448.65,73.69,223.20,1,-1,-1,-1 25 | ''' 26 | 27 | 28 | def xyxy2tlwh(x): 29 | ''' 30 | (top left x, top left y,width, height) 31 | ''' 32 | y = torch.zeros_like(x) if isinstance(x, 33 | torch.Tensor) else np.zeros_like(x) 34 | y[:, 0] = x[:, 0] 35 | y[:, 1] = x[:, 1] 36 | y[:, 2] = x[:, 2] - x[:, 0] 37 | y[:, 3] = x[:, 3] - x[:, 1] 38 | return y 39 | 40 | 41 | class Detector(object): 42 | def __init__(self, args): 43 | self.args = args 44 | if args.display: 45 | cv2.namedWindow("test", cv2.WINDOW_NORMAL) 46 | cv2.resizeWindow("test", args.display_width, args.display_height) 47 | 48 | device = torch.device( 49 | 'cuda') if torch.cuda.is_available() else torch.device('cpu') 50 | 51 | self.vdo = cv2.VideoCapture() 52 | self.yolo3 = InferYOLOv3(args.yolo_cfg, 53 | args.img_size, 54 | args.yolo_weights, 55 | args.data_cfg, 56 | device, 57 | conf_thres=args.conf_thresh, 58 | nms_thres=args.nms_thresh) 59 | self.deepsort = DeepSort(args.deepsort_checkpoint) 60 | 61 | def __enter__(self): 62 | assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" 63 | self.vdo.open(self.args.VIDEO_PATH) 64 | self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) 65 | self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) 66 | 67 | if self.args.save_path: 68 | fourcc = cv2.VideoWriter_fourcc(*'MJPG') 69 | self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, 70 | (self.im_width, self.im_height)) 71 | 72 | assert self.vdo.isOpened() 73 | return self 74 | 75 | def __exit__(self, exc_type, exc_value, exc_traceback): 76 | if exc_type: 77 | print(exc_type, exc_value, exc_traceback) 78 | 79 | def detect(self, outfile=None): 80 | frame_cnt = -1 81 | 82 | if outfile is not None: 83 | f = open(outfile, 'w') 84 | 85 | print("begin....") 86 | 87 | while self.vdo.grab(): 88 | frame_cnt += 1 89 | 90 | if frame_cnt % 3 == 0: 91 | continue 92 | 93 | start = time.time() 94 | _, ori_im = self.vdo.retrieve() 95 | im = ori_im 96 | 97 | t1_begin = time.time() 98 | bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im) 99 | t1_end = time.time() 100 | 101 | t2_begin = time.time() 102 | if bbox_xxyy is not None: 103 | # select class 104 | # mask = cls_ids == 0 105 | # bbox_xxyy = bbox_xxyy[mask] 106 | 107 | # bbox_xxyy[:, 3:] *= 1.2 108 | # cls_conf = cls_conf[mask] 109 | 110 | bbox_xcycwh = xyxy2xywh(bbox_xxyy) 111 | outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im) 112 | 113 | if len(outputs) > 0: 114 | bbox_xyxy = outputs[:, :4] 115 | identities = outputs[:, -1] 116 | # 画框 117 | ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) 118 | 119 | # frame, id, tlwh(%.2f),1,-1,-1,-1 120 | if outfile is not None: 121 | box_xywh = xyxy2tlwh(bbox_xyxy) 122 | for i in range(len(box_xywh)): 123 | write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % ( 124 | frame_cnt + 125 | 1, outputs[i, -1], int(box_xywh[i] 126 | [0]), int(box_xywh[i][1]), 127 | int(box_xywh[i][2]), int(box_xywh[i][3])) 128 | f.write(write_line) 129 | 130 | t2_end = time.time() 131 | 132 | end = time.time() 133 | print( 134 | "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" 135 | % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin), 136 | (end - start), ((t1_end - t1_begin) * 100 / 137 | ((end - start))), (1 / (end - start)))) 138 | if self.args.display: 139 | cv2.imshow("test", ori_im) 140 | cv2.waitKey(1) 141 | 142 | if self.args.save_path: 143 | self.output.write(ori_im) 144 | 145 | if outfile is not None: 146 | f.close() 147 | 148 | 149 | def parse_args(): 150 | parser = argparse.ArgumentParser() 151 | parser.add_argument("VIDEO_PATH", type=str) 152 | parser.add_argument("--yolo_cfg", 153 | type=str, 154 | default="../YOLOv3-complete-pruning-master/cfg/dense-v3-tiny-spp.cfg" 155 | ) 156 | parser.add_argument( 157 | "--yolo_weights", 158 | type=str, 159 | default="../YOLOv3-complete-pruning-master/weights/A6/last.pt" 160 | ) 161 | parser.add_argument("--conf_thresh", type=float, default=0.5) # ori 0.5 162 | parser.add_argument("--nms_thresh", type=float, default=0.3) 163 | parser.add_argument("--deepsort_checkpoint", 164 | type=str, 165 | default="deep_sort/deep/checkpoint/mobilenetv2_x1_0_best.pt") 166 | parser.add_argument("--max_dist", type=float, default=0.2) 167 | parser.add_argument("--ignore_display", 168 | dest="display", 169 | action="store_false") 170 | parser.add_argument("--display_width", type=int, default=800) 171 | parser.add_argument("--display_height", type=int, default=600) 172 | parser.add_argument("--save_path", type=str, default="demo.avi") 173 | parser.add_argument("--data_cfg", type=str, default="data/voc_small.data") 174 | parser.add_argument("--img_size", type=int, default=416, help="img size") 175 | 176 | return parser.parse_args() 177 | 178 | 179 | if __name__ == "__main__": 180 | args = parse_args() 181 | output_file = "./data/videosample/predicts.txt" 182 | with Detector(args) as det: 183 | det.detect(output_file) 184 | 185 | os.system("ffmpeg -y -i demo.avi -r 10 -b:a 32k %s_output.mp4" % 186 | (os.path.basename(args.VIDEO_PATH).split('.')[0])) 187 | -------------------------------------------------------------------------------- /deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | from .deep_sort import DeepSort -------------------------------------------------------------------------------- /deep_sort/deep/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.jpg 3 | checkpoint/ckpt.t7 4 | *.json 5 | data/videoAndLabel/cutout8.mp4 6 | -------------------------------------------------------------------------------- /deep_sort/deep/README.md: -------------------------------------------------------------------------------- 1 | # reid_for_deepsort 2 | simplest reid for https://github.com/pprp/yolov3.pytorch 3 | -------------------------------------------------------------------------------- /deep_sort/deep/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/deep/__init__.py -------------------------------------------------------------------------------- /deep_sort/deep/checkpoint/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/deep/checkpoint/.gitkeep -------------------------------------------------------------------------------- /deep_sort/deep/checkpoint/ckpt.t7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/deep/checkpoint/ckpt.t7 -------------------------------------------------------------------------------- /deep_sort/deep/eval.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import sys 5 | 6 | import numpy as np 7 | import torch 8 | from torch.autograd import Variable 9 | from torch.utils.data import DataLoader, Dataset 10 | from torchvision import datasets, transforms 11 | 12 | from train import input_size 13 | from models import build_model 14 | 15 | test_transforms = transforms.Compose([ 16 | transforms.Resize(input_size), 17 | transforms.ToTensor(), 18 | transforms.Normalize([0.3568, 0.3141, 0.2781], [0.1752, 0.1857, 0.1879]) 19 | ]) 20 | 21 | gallery_datasets = datasets.ImageFolder(os.path.join("data", "gallery"), 22 | transform=test_transforms) 23 | query_datasets = datasets.ImageFolder(os.path.join("data", "query"), 24 | transform=test_transforms) 25 | 26 | gallery_dataloader = DataLoader(gallery_datasets, 27 | batch_size=128, 28 | drop_last=False, 29 | shuffle=False, 30 | num_workers=1) 31 | 32 | query_dataloader = DataLoader(query_datasets, 33 | batch_size=128, 34 | drop_last=False, 35 | shuffle=False, 36 | num_workers=1) 37 | 38 | use_gpu = torch.cuda.is_available() 39 | 40 | class_names = gallery_datasets.classes 41 | 42 | 43 | def fliplr(img): 44 | '''flip horizontal''' 45 | inv_idx = torch.arange(img.size(3) - 1, -1, -1).long() 46 | img_flip = img.index_select(3, inv_idx) # flip along w 47 | return img_flip 48 | 49 | 50 | def extract_features(model, dataloader): 51 | features = torch.FloatTensor() 52 | count = 0 53 | for data in dataloader: 54 | img, label = data 55 | bs, c, h, w = img.size() 56 | count += bs 57 | ff = torch.FloatTensor(bs, 96).zero_() # 2048 if res50 58 | print(count, end='\r') 59 | sys.stdout.flush() 60 | # add two features 61 | for i in range(2): 62 | if i == 1: 63 | img = fliplr(img) 64 | input_img = Variable(img.cuda()) 65 | # print("=", input_img.shape) 66 | feature = model(input_img) 67 | feature = feature.data.cpu() 68 | # print(ff.shape, feature.shape) 69 | ff = ff + feature 70 | # norm features 71 | fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) 72 | ff = ff.div(fnorm.expand_as(ff)) 73 | 74 | features = torch.cat((features, ff), 0) 75 | return features 76 | 77 | 78 | def get_label(img_path): 79 | labels = [] 80 | for path, _ in img_path: 81 | filename = os.path.basename(path) 82 | label = filename.split('_')[0] 83 | if label[0:2] == '-1': 84 | labels.append(-1) 85 | else: 86 | labels.append(label) 87 | return labels 88 | 89 | 90 | def compute_mAP(index, good_index, junk_index): 91 | ap = 0 92 | cmc = torch.IntTensor(len(index)).zero_() #len = 20 得到前20个 93 | if good_index.size == 0: 94 | cmc[0] = -1 95 | return ap, cmc 96 | 97 | # remove junk index 98 | mask = np.in1d(index, junk_index, invert=True) 99 | index = index[mask] 100 | 101 | # find good index 102 | ngood = len(good_index) 103 | mask = np.in1d(index, good_index) 104 | rows_good = np.argwhere(mask == True) 105 | rows_good = rows_good.flatten() 106 | 107 | cmc[rows_good[0]:] = 1 108 | for i in range(ngood): 109 | d_recall = 1.0 / ngood 110 | precision = (i + 1) * 1.0 / (rows_good[i] + 1) 111 | if rows_good[i] != 0: 112 | old_precision = i * 1.0 / rows_good[i] 113 | else: 114 | old_precision = 1.0 115 | ap = ap + d_recall * (old_precision + precision) / 2 116 | return ap, cmc 117 | 118 | 119 | def evaluate(qf, ql, gf, gl): 120 | query = qf.view(-1, 1) # query 是一张图 121 | score = torch.mm(gf, query) # 计算得分[1, num] 122 | score = score.squeeze(1).cpu() 123 | score = score.numpy() 124 | #predict index 125 | index = np.argsort(score) 126 | index = index[::-1] # index 倒过来 127 | # 得到前20个 128 | # index = index[0:20] 129 | 130 | # good index , label一致 131 | good_index = np.argwhere(gl == ql) 132 | # print("good_index", gl, '\n', ql, gl == ql, type(gl)) 133 | junk_index = np.argwhere(gl == "bg") 134 | 135 | CMC = compute_mAP(index, good_index, junk_index) 136 | return CMC 137 | 138 | 139 | if __name__ == "__main__": 140 | parser = argparse.ArgumentParser('help') 141 | parser.add_argument('--weight_path', 142 | type=str, 143 | default="./checkpints/last.pt") 144 | parser.add_argument("--model", type=str, default="mudeep") 145 | args = parser.parse_args() 146 | 147 | model = build_model(name=args.model, num_classes=len(class_names)) 148 | assert os.path.isfile( 149 | "./checkpoint/%s/%s_last.pt" % 150 | (args.model, args.model)), "Error: no checkpoint file found!" 151 | print('Loading from checkpoint/last.pt') 152 | checkpoint = torch.load("./checkpoint/%s/%s_last.pt" % 153 | (args.model, args.model)) 154 | net_dict = checkpoint['net_dict'] 155 | model.load_state_dict(net_dict) 156 | 157 | model.eval() 158 | if use_gpu: 159 | model = model.cuda() 160 | 161 | gallery_features = extract_features(model, gallery_dataloader) 162 | query_features = extract_features(model, query_dataloader) 163 | 164 | gallery_label = np.array(get_label(gallery_datasets.imgs)) 165 | query_label = np.array(get_label(query_datasets.imgs)) 166 | 167 | if use_gpu: 168 | gallery_features = gallery_features.cuda() 169 | query_features = query_features.cuda() 170 | 171 | CMC = torch.IntTensor(len(gallery_label)).zero_() 172 | ap = 0.0 173 | for i in range(len(query_label)): 174 | ap_tmp, CMC_tmp = evaluate(query_features[i], query_label[i], 175 | gallery_features, gallery_label) 176 | if CMC_tmp[0] == -1: 177 | continue 178 | CMC = CMC + CMC_tmp 179 | # print(i, ":",ap_tmp) 180 | ap += ap_tmp 181 | 182 | CMC = CMC.float() 183 | CMC = CMC / len(query_label) 184 | 185 | print("\tRank@1:%f\n\tRank@5:%f\n\tRank@10:%f\n\tmAP:%f" % 186 | (CMC[0], CMC[4], CMC[9], ap / len(query_label))) 187 | -------------------------------------------------------------------------------- /deep_sort/deep/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | 6 | from .models import build_model 7 | # from .train import input_size 8 | 9 | 10 | class Extractor(object): 11 | def __init__(self, model_name, model_path, use_cuda=True): 12 | self.net = build_model(name=model_name, 13 | num_classes=96) #osnet_small(96, reid=True) 14 | self.device = "cuda" if torch.cuda.is_available( 15 | ) and use_cuda else "cpu" 16 | state_dict = torch.load(model_path)['net_dict'] 17 | self.net.load_state_dict(state_dict) 18 | print("Loading weights from {}... Done!".format(model_path)) 19 | self.net.to(self.device) 20 | self.size = (128,128) 21 | self.norm = transforms.Compose([ 22 | transforms.ToTensor(), 23 | transforms.Normalize([0.3568, 0.3141, 0.2781], 24 | [0.1752, 0.1857, 0.1879]) 25 | ]) 26 | 27 | def _preprocess(self, im_crops): 28 | """ 29 | TODO: 30 | 1. to float with scale from 0 to 1 31 | 2. resize to (64, 128) as Market1501 dataset did 32 | 3. concatenate to a numpy array 33 | 3. to torch Tensor 34 | 4. normalize 35 | """ 36 | def _resize(im, size): 37 | return cv2.resize(im.astype(np.float32) / 255., size) 38 | 39 | im_batch = torch.cat([ 40 | self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops 41 | ], 42 | dim=0).float() 43 | return im_batch 44 | 45 | def __call__(self, im_crops): 46 | im_batch = self._preprocess(im_crops) 47 | with torch.no_grad(): 48 | im_batch = im_batch.to(self.device) 49 | features = self.net(im_batch) 50 | return features.cpu().numpy() 51 | 52 | 53 | if __name__ == '__main__': 54 | img = cv2.imread("data/reid/cutout13_0/cutout13_0_0.jpg")[:, :, (2, 1, 0)] 55 | extr = Extractor("mudeep","checkpoint/best.pt") 56 | feature = extr([img, img]) 57 | -------------------------------------------------------------------------------- /deep_sort/deep/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class BasicBlock(nn.Module): 7 | def __init__(self, c_in, c_out, is_downsample=False): 8 | super(BasicBlock, self).__init__() 9 | self.is_downsample = is_downsample 10 | if is_downsample: 11 | self.conv1 = nn.Conv2d(c_in, 12 | c_out, 13 | 3, 14 | stride=2, 15 | padding=1, 16 | bias=False) 17 | else: 18 | self.conv1 = nn.Conv2d(c_in, 19 | c_out, 20 | 3, 21 | stride=1, 22 | padding=1, 23 | bias=False) 24 | self.bn1 = nn.BatchNorm2d(c_out) 25 | self.relu = nn.ReLU(True) 26 | self.conv2 = nn.Conv2d(c_out, 27 | c_out, 28 | 3, 29 | stride=1, 30 | padding=1, 31 | bias=False) 32 | self.bn2 = nn.BatchNorm2d(c_out) 33 | if is_downsample: 34 | self.downsample = nn.Sequential( 35 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 36 | nn.BatchNorm2d(c_out)) 37 | elif c_in != c_out: 38 | self.downsample = nn.Sequential( 39 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 40 | nn.BatchNorm2d(c_out)) 41 | self.is_downsample = True 42 | 43 | def forward(self, x): 44 | y = self.conv1(x) 45 | y = self.bn1(y) 46 | y = self.relu(y) 47 | y = self.conv2(y) 48 | y = self.bn2(y) 49 | if self.is_downsample: 50 | x = self.downsample(x) 51 | return F.relu(x.add(y), True) 52 | 53 | 54 | def make_layers(c_in, c_out, repeat_times, is_downsample=False): 55 | blocks = [] 56 | for i in range(repeat_times): 57 | if i == 0: 58 | blocks += [ 59 | BasicBlock(c_in, c_out, is_downsample=is_downsample), 60 | ] 61 | else: 62 | blocks += [ 63 | BasicBlock(c_out, c_out), 64 | ] 65 | return nn.Sequential(*blocks) 66 | 67 | 68 | class Net(nn.Module): 69 | def __init__(self, num_classes=751, reid=False): 70 | super(Net, self).__init__() 71 | # 3 128 64 72 | self.conv = nn.Sequential( 73 | nn.Conv2d(3, 64, 3, stride=1, padding=1), 74 | nn.BatchNorm2d(64), 75 | nn.ReLU(inplace=True), 76 | # nn.Conv2d(32,32,3,stride=1,padding=1), 77 | # nn.BatchNorm2d(32), 78 | # nn.ReLU(inplace=True), 79 | nn.MaxPool2d(3, 2, padding=1), 80 | ) 81 | # 32 64 32 82 | self.layer1 = make_layers(64, 64, 2, False) 83 | # 32 64 32 84 | self.layer2 = make_layers(64, 128, 2, True) 85 | # 64 32 16 86 | self.layer3 = make_layers(128, 256, 2, True) 87 | # 128 16 8 88 | self.layer4 = make_layers(256, 512, 2, True) 89 | # 256 8 4 90 | self.avgpool = nn.AvgPool2d((8, 4), 1) 91 | # 256 1 1 92 | self.reid = reid 93 | self.classifier = nn.Sequential( 94 | nn.Linear(512, 256), 95 | nn.BatchNorm1d(256), 96 | nn.ReLU(inplace=True), 97 | nn.Dropout(), 98 | nn.Linear(256, num_classes), 99 | ) 100 | 101 | def forward(self, x): 102 | x = self.conv(x) 103 | x = self.layer1(x) 104 | x = self.layer2(x) 105 | x = self.layer3(x) 106 | x = self.layer4(x) 107 | x = self.avgpool(x) 108 | x = x.view(x.size(0), -1) 109 | # B x 128 110 | if self.reid: 111 | x = x.div(x.norm(p=2, dim=1, keepdim=True)) 112 | return x 113 | # classifier 114 | x = self.classifier(x) 115 | return x 116 | 117 | 118 | if __name__ == '__main__': 119 | net = Net() 120 | x = torch.randn(4, 3, 128, 64) 121 | y = net(x) 122 | import ipdb 123 | ipdb.set_trace() 124 | -------------------------------------------------------------------------------- /deep_sort/deep/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | 4 | from .pcb import * 5 | from .mlfn import * 6 | from .hacnn import * 7 | from .osnet import * 8 | from .senet import * 9 | from .mudeep import * 10 | from .nasnet import * 11 | from .resnet import * 12 | from .densenet import * 13 | from .xception import * 14 | from .osnet_ain import * 15 | from .resnetmid import * 16 | from .shufflenet import * 17 | from .squeezenet import * 18 | from .inceptionv4 import * 19 | from .mobilenetv2 import * 20 | from .resnet_ibn_a import * 21 | from .resnet_ibn_b import * 22 | from .shufflenetv2 import * 23 | from .inceptionresnetv2 import * 24 | 25 | __model_factory = { 26 | # image classification models 27 | 'resnet18': resnet18, 28 | 'resnet34': resnet34, 29 | 'resnet50': resnet50, 30 | 'resnet101': resnet101, 31 | 'resnet152': resnet152, 32 | 'resnext50_32x4d': resnext50_32x4d, 33 | 'resnext101_32x8d': resnext101_32x8d, 34 | 'resnet50_fc512': resnet50_fc512, 35 | 'se_resnet50': se_resnet50, 36 | 'se_resnet50_fc512': se_resnet50_fc512, 37 | 'se_resnet101': se_resnet101, 38 | 'se_resnext50_32x4d': se_resnext50_32x4d, 39 | 'se_resnext101_32x4d': se_resnext101_32x4d, 40 | 'densenet121': densenet121, 41 | 'densenet169': densenet169, 42 | 'densenet201': densenet201, 43 | 'densenet161': densenet161, 44 | 'densenet121_fc512': densenet121_fc512, 45 | 'inceptionresnetv2': inceptionresnetv2, 46 | 'inceptionv4': inceptionv4, 47 | 'xception': xception, 48 | 'resnet50_ibn_a': resnet50_ibn_a, 49 | 'resnet50_ibn_b': resnet50_ibn_b, 50 | # lightweight models 51 | 'nasnsetmobile': nasnetamobile, 52 | 'mobilenetv2_x1_0': mobilenetv2_x1_0, 53 | 'mobilenetv2_x1_4': mobilenetv2_x1_4, 54 | 'shufflenet': shufflenet, 55 | 'squeezenet1_0': squeezenet1_0, 56 | 'squeezenet1_0_fc512': squeezenet1_0_fc512, 57 | 'squeezenet1_1': squeezenet1_1, 58 | 'shufflenet_v2_x0_5': shufflenet_v2_x0_5, 59 | 'shufflenet_v2_x1_0': shufflenet_v2_x1_0, 60 | 'shufflenet_v2_x1_5': shufflenet_v2_x1_5, 61 | 'shufflenet_v2_x2_0': shufflenet_v2_x2_0, 62 | # reid-specific models 63 | 'mudeep': MuDeep, 64 | 'resnet50mid': resnet50mid, 65 | 'hacnn': HACNN, 66 | 'pcb_p6': pcb_p6, 67 | 'pcb_p4': pcb_p4, 68 | 'mlfn': mlfn, 69 | 'osnet_x1_0': osnet_x1_0, 70 | 'osnet_x0_75': osnet_x0_75, 71 | 'osnet_x0_5': osnet_x0_5, 72 | 'osnet_x0_25': osnet_x0_25, 73 | 'osnet_ibn_x1_0': osnet_ibn_x1_0, 74 | 'osnet_ain_x1_0': osnet_ain_x1_0 75 | } 76 | 77 | 78 | def show_avai_models(): 79 | """Displays available models. 80 | 81 | Examples:: 82 | >>> from torchreid import models 83 | >>> models.show_avai_models() 84 | """ 85 | print(list(__model_factory.keys())) 86 | 87 | 88 | def build_model(name, 89 | num_classes, 90 | loss='softmax', 91 | pretrained=True, 92 | use_gpu=True): 93 | """A function wrapper for building a model. 94 | 95 | Args: 96 | name (str): model name. 97 | num_classes (int): number of training identities. 98 | loss (str, optional): loss function to optimize the model. Currently 99 | supports "softmax" and "triplet". Default is "softmax". 100 | pretrained (bool, optional): whether to load ImageNet-pretrained weights. 101 | Default is True. 102 | use_gpu (bool, optional): whether to use gpu. Default is True. 103 | 104 | Returns: 105 | nn.Module 106 | 107 | Examples:: 108 | >>> from torchreid import models 109 | >>> model = models.build_model('resnet50', 751, loss='softmax') 110 | """ 111 | avai_models = list(__model_factory.keys()) 112 | if name not in avai_models: 113 | raise KeyError('Unknown model: {}. Must be one of {}'.format( 114 | name, avai_models)) 115 | return __model_factory[name](num_classes=num_classes, 116 | loss=loss, 117 | pretrained=pretrained, 118 | use_gpu=use_gpu) 119 | -------------------------------------------------------------------------------- /deep_sort/deep/models/mudeep.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | import torch 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | __all__ = ['MuDeep'] 7 | 8 | 9 | class ConvBlock(nn.Module): 10 | """Basic convolutional block. 11 | 12 | convolution + batch normalization + relu. 13 | 14 | Args: 15 | in_c (int): number of input channels. 16 | out_c (int): number of output channels. 17 | k (int or tuple): kernel size. 18 | s (int or tuple): stride. 19 | p (int or tuple): padding. 20 | """ 21 | 22 | def __init__(self, in_c, out_c, k, s, p): 23 | super(ConvBlock, self).__init__() 24 | self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p) 25 | self.bn = nn.BatchNorm2d(out_c) 26 | 27 | def forward(self, x): 28 | return F.relu(self.bn(self.conv(x))) 29 | 30 | 31 | class ConvLayers(nn.Module): 32 | """Preprocessing layers.""" 33 | 34 | def __init__(self): 35 | super(ConvLayers, self).__init__() 36 | self.conv1 = ConvBlock(3, 48, k=3, s=1, p=1) 37 | self.conv2 = ConvBlock(48, 96, k=3, s=1, p=1) 38 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 39 | 40 | def forward(self, x): 41 | x = self.conv1(x) 42 | x = self.conv2(x) 43 | x = self.maxpool(x) 44 | return x 45 | 46 | 47 | class MultiScaleA(nn.Module): 48 | """Multi-scale stream layer A (Sec.3.1)""" 49 | 50 | def __init__(self): 51 | super(MultiScaleA, self).__init__() 52 | self.stream1 = nn.Sequential( 53 | ConvBlock(96, 96, k=1, s=1, p=0), 54 | ConvBlock(96, 24, k=3, s=1, p=1), 55 | ) 56 | self.stream2 = nn.Sequential( 57 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 58 | ConvBlock(96, 24, k=1, s=1, p=0), 59 | ) 60 | self.stream3 = ConvBlock(96, 24, k=1, s=1, p=0) 61 | self.stream4 = nn.Sequential( 62 | ConvBlock(96, 16, k=1, s=1, p=0), 63 | ConvBlock(16, 24, k=3, s=1, p=1), 64 | ConvBlock(24, 24, k=3, s=1, p=1), 65 | ) 66 | 67 | def forward(self, x): 68 | s1 = self.stream1(x) 69 | s2 = self.stream2(x) 70 | s3 = self.stream3(x) 71 | s4 = self.stream4(x) 72 | y = torch.cat([s1, s2, s3, s4], dim=1) 73 | return y 74 | 75 | 76 | class Reduction(nn.Module): 77 | """Reduction layer (Sec.3.1)""" 78 | 79 | def __init__(self): 80 | super(Reduction, self).__init__() 81 | self.stream1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 82 | self.stream2 = ConvBlock(96, 96, k=3, s=2, p=1) 83 | self.stream3 = nn.Sequential( 84 | ConvBlock(96, 48, k=1, s=1, p=0), 85 | ConvBlock(48, 56, k=3, s=1, p=1), 86 | ConvBlock(56, 64, k=3, s=2, p=1), 87 | ) 88 | 89 | def forward(self, x): 90 | s1 = self.stream1(x) 91 | s2 = self.stream2(x) 92 | s3 = self.stream3(x) 93 | y = torch.cat([s1, s2, s3], dim=1) 94 | return y 95 | 96 | 97 | class MultiScaleB(nn.Module): 98 | """Multi-scale stream layer B (Sec.3.1)""" 99 | 100 | def __init__(self): 101 | super(MultiScaleB, self).__init__() 102 | self.stream1 = nn.Sequential( 103 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 104 | ConvBlock(256, 256, k=1, s=1, p=0), 105 | ) 106 | self.stream2 = nn.Sequential( 107 | ConvBlock(256, 64, k=1, s=1, p=0), 108 | ConvBlock(64, 128, k=(1, 3), s=1, p=(0, 1)), 109 | ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)), 110 | ) 111 | self.stream3 = ConvBlock(256, 256, k=1, s=1, p=0) 112 | self.stream4 = nn.Sequential( 113 | ConvBlock(256, 64, k=1, s=1, p=0), 114 | ConvBlock(64, 64, k=(1, 3), s=1, p=(0, 1)), 115 | ConvBlock(64, 128, k=(3, 1), s=1, p=(1, 0)), 116 | ConvBlock(128, 128, k=(1, 3), s=1, p=(0, 1)), 117 | ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)), 118 | ) 119 | 120 | def forward(self, x): 121 | s1 = self.stream1(x) 122 | s2 = self.stream2(x) 123 | s3 = self.stream3(x) 124 | s4 = self.stream4(x) 125 | return s1, s2, s3, s4 126 | 127 | 128 | class Fusion(nn.Module): 129 | """Saliency-based learning fusion layer (Sec.3.2)""" 130 | 131 | def __init__(self): 132 | super(Fusion, self).__init__() 133 | self.a1 = nn.Parameter(torch.rand(1, 256, 1, 1)) 134 | self.a2 = nn.Parameter(torch.rand(1, 256, 1, 1)) 135 | self.a3 = nn.Parameter(torch.rand(1, 256, 1, 1)) 136 | self.a4 = nn.Parameter(torch.rand(1, 256, 1, 1)) 137 | 138 | # We add an average pooling layer to reduce the spatial dimension 139 | # of feature maps, which differs from the original paper. 140 | self.avgpool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0) 141 | 142 | def forward(self, x1, x2, x3, x4): 143 | s1 = self.a1.expand_as(x1) * x1 144 | s2 = self.a2.expand_as(x2) * x2 145 | s3 = self.a3.expand_as(x3) * x3 146 | s4 = self.a4.expand_as(x4) * x4 147 | y = self.avgpool(s1 + s2 + s3 + s4) 148 | return y 149 | 150 | 151 | class MuDeep(nn.Module): 152 | """Multiscale deep neural network. 153 | 154 | Reference: 155 | Qian et al. Multi-scale Deep Learning Architectures 156 | for Person Re-identification. ICCV 2017. 157 | 158 | Public keys: 159 | - ``mudeep``: Multiscale deep neural network. 160 | """ 161 | 162 | def __init__(self, num_classes, loss='softmax', **kwargs): 163 | super(MuDeep, self).__init__() 164 | self.loss = loss 165 | 166 | self.block1 = ConvLayers() 167 | self.block2 = MultiScaleA() 168 | self.block3 = Reduction() 169 | self.block4 = MultiScaleB() 170 | self.block5 = Fusion() 171 | 172 | # Due to this fully connected layer, input image has to be fixed 173 | # in shape, i.e. (3, 256, 128), such that the last convolutional feature 174 | # maps are of shape (256, 16, 8). If input shape is changed, 175 | # the input dimension of this layer has to be changed accordingly. 176 | self.fc = nn.Sequential( 177 | nn.Linear(256 * 8 * 8, 4096), 178 | nn.BatchNorm1d(4096), 179 | nn.ReLU(), 180 | ) 181 | self.classifier = nn.Linear(4096, num_classes) 182 | self.feat_dim = 4096 183 | 184 | def featuremaps(self, x): 185 | x = self.block1(x) 186 | x = self.block2(x) 187 | x = self.block3(x) 188 | x = self.block4(x) 189 | x = self.block5(*x) 190 | return x 191 | 192 | def forward(self, x, return_featuremaps=False): 193 | x = self.featuremaps(x) 194 | if return_featuremaps: 195 | return x 196 | x = x.view(x.size(0), -1) 197 | x = self.fc(x) 198 | y = self.classifier(x) 199 | 200 | if self.loss == 'softmax': 201 | return y 202 | elif self.loss == 'triplet': 203 | return y, x 204 | else: 205 | raise KeyError('Unsupported loss: {}'.format(self.loss)) 206 | -------------------------------------------------------------------------------- /deep_sort/deep/models/original_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class BasicBlock(nn.Module): 6 | def __init__(self, c_in, c_out,is_downsample=False): 7 | super(BasicBlock,self).__init__() 8 | self.is_downsample = is_downsample 9 | if is_downsample: 10 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False) 11 | else: 12 | self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False) 13 | self.bn1 = nn.BatchNorm2d(c_out) 14 | self.relu = nn.ReLU(True) 15 | self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(c_out) 17 | if is_downsample: 18 | self.downsample = nn.Sequential( 19 | nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), 20 | nn.BatchNorm2d(c_out) 21 | ) 22 | elif c_in != c_out: 23 | self.downsample = nn.Sequential( 24 | nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), 25 | nn.BatchNorm2d(c_out) 26 | ) 27 | self.is_downsample = True 28 | 29 | def forward(self,x): 30 | y = self.conv1(x) 31 | y = self.bn1(y) 32 | y = self.relu(y) 33 | y = self.conv2(y) 34 | y = self.bn2(y) 35 | if self.is_downsample: 36 | x = self.downsample(x) 37 | return F.relu(x.add(y),True) 38 | 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False): 40 | blocks = [] 41 | for i in range(repeat_times): 42 | if i ==0: 43 | blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),] 44 | else: 45 | blocks += [BasicBlock(c_out,c_out),] 46 | return nn.Sequential(*blocks) 47 | 48 | class Net(nn.Module): 49 | def __init__(self, num_classes=625 ,reid=False): 50 | super(Net,self).__init__() 51 | # 3 128 64 52 | self.conv = nn.Sequential( 53 | nn.Conv2d(3,32,3,stride=1,padding=1), 54 | nn.BatchNorm2d(32), 55 | nn.ELU(inplace=True), 56 | nn.Conv2d(32,32,3,stride=1,padding=1), 57 | nn.BatchNorm2d(32), 58 | nn.ELU(inplace=True), 59 | nn.MaxPool2d(3,2,padding=1), 60 | ) 61 | # 32 64 32 62 | self.layer1 = make_layers(32,32,2,False) 63 | # 32 64 32 64 | self.layer2 = make_layers(32,64,2,True) 65 | # 64 32 16 66 | self.layer3 = make_layers(64,128,2,True) 67 | 68 | self.gap = nn.AdaptiveAvgPool2d(1) 69 | 70 | # 128 16 8 71 | self.dense = nn.Sequential( 72 | nn.Dropout(p=0.5), 73 | nn.Linear(128, 128), 74 | nn.BatchNorm1d(128), 75 | nn.ELU(inplace=True) 76 | ) 77 | # 256 1 1 78 | self.reid = reid 79 | self.batch_norm = nn.BatchNorm1d(128) 80 | self.classifier = nn.Sequential( 81 | nn.Linear(128, num_classes), 82 | ) 83 | 84 | def forward(self, x): 85 | bs = x.shape[0] 86 | x = self.conv(x) 87 | x = self.layer1(x) 88 | x = self.layer2(x) 89 | x = self.layer3(x) 90 | x = self.gap(x).view(bs, -1) 91 | 92 | if self.reid: 93 | x = self.dense[0](x) 94 | x = self.dense[1](x) 95 | x = x.div(x.norm(p=2,dim=1,keepdim=True)) 96 | return x 97 | 98 | x = self.dense(x) 99 | # B x 128 100 | # classifier 101 | x = self.classifier(x) 102 | return x 103 | 104 | 105 | if __name__ == '__main__': 106 | net = Net(reid=True) 107 | x = torch.randn(4,3,128,64) 108 | y = net(x) 109 | import ipdb; ipdb.set_trace() 110 | 111 | 112 | -------------------------------------------------------------------------------- /deep_sort/deep/models/shufflenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | import torch 3 | import torch.utils.model_zoo as model_zoo 4 | from torch import nn 5 | from torch.nn import functional as F 6 | 7 | __all__ = ['shufflenet'] 8 | 9 | model_urls = { 10 | # training epoch = 90, top1 = 61.8 11 | 'imagenet': 12 | 'https://mega.nz/#!RDpUlQCY!tr_5xBEkelzDjveIYBBcGcovNCOrgfiJO9kiidz9fZM', 13 | } 14 | 15 | 16 | class ChannelShuffle(nn.Module): 17 | 18 | def __init__(self, num_groups): 19 | super(ChannelShuffle, self).__init__() 20 | self.g = num_groups 21 | 22 | def forward(self, x): 23 | b, c, h, w = x.size() 24 | n = c // self.g 25 | # reshape 26 | x = x.view(b, self.g, n, h, w) 27 | # transpose 28 | x = x.permute(0, 2, 1, 3, 4).contiguous() 29 | # flatten 30 | x = x.view(b, c, h, w) 31 | return x 32 | 33 | 34 | class Bottleneck(nn.Module): 35 | 36 | def __init__( 37 | self, 38 | in_channels, 39 | out_channels, 40 | stride, 41 | num_groups, 42 | group_conv1x1=True 43 | ): 44 | super(Bottleneck, self).__init__() 45 | assert stride in [1, 2], 'Warning: stride must be either 1 or 2' 46 | self.stride = stride 47 | mid_channels = out_channels // 4 48 | if stride == 2: out_channels -= in_channels 49 | # group conv is not applied to first conv1x1 at stage 2 50 | num_groups_conv1x1 = num_groups if group_conv1x1 else 1 51 | self.conv1 = nn.Conv2d( 52 | in_channels, 53 | mid_channels, 54 | 1, 55 | groups=num_groups_conv1x1, 56 | bias=False 57 | ) 58 | self.bn1 = nn.BatchNorm2d(mid_channels) 59 | self.shuffle1 = ChannelShuffle(num_groups) 60 | self.conv2 = nn.Conv2d( 61 | mid_channels, 62 | mid_channels, 63 | 3, 64 | stride=stride, 65 | padding=1, 66 | groups=mid_channels, 67 | bias=False 68 | ) 69 | self.bn2 = nn.BatchNorm2d(mid_channels) 70 | self.conv3 = nn.Conv2d( 71 | mid_channels, out_channels, 1, groups=num_groups, bias=False 72 | ) 73 | self.bn3 = nn.BatchNorm2d(out_channels) 74 | if stride == 2: self.shortcut = nn.AvgPool2d(3, stride=2, padding=1) 75 | 76 | def forward(self, x): 77 | out = F.relu(self.bn1(self.conv1(x))) 78 | out = self.shuffle1(out) 79 | out = self.bn2(self.conv2(out)) 80 | out = self.bn3(self.conv3(out)) 81 | if self.stride == 2: 82 | res = self.shortcut(x) 83 | out = F.relu(torch.cat([res, out], 1)) 84 | else: 85 | out = F.relu(x + out) 86 | return out 87 | 88 | 89 | # configuration of (num_groups: #out_channels) based on Table 1 in the paper 90 | cfg = { 91 | 1: [144, 288, 576], 92 | 2: [200, 400, 800], 93 | 3: [240, 480, 960], 94 | 4: [272, 544, 1088], 95 | 8: [384, 768, 1536], 96 | } 97 | 98 | 99 | class ShuffleNet(nn.Module): 100 | """ShuffleNet. 101 | 102 | Reference: 103 | Zhang et al. ShuffleNet: An Extremely Efficient Convolutional Neural 104 | Network for Mobile Devices. CVPR 2018. 105 | 106 | Public keys: 107 | - ``shufflenet``: ShuffleNet (groups=3). 108 | """ 109 | 110 | def __init__(self, num_classes, loss='softmax', num_groups=3, **kwargs): 111 | super(ShuffleNet, self).__init__() 112 | self.loss = loss 113 | 114 | self.conv1 = nn.Sequential( 115 | nn.Conv2d(3, 24, 3, stride=2, padding=1, bias=False), 116 | nn.BatchNorm2d(24), 117 | nn.ReLU(), 118 | nn.MaxPool2d(3, stride=2, padding=1), 119 | ) 120 | 121 | self.stage2 = nn.Sequential( 122 | Bottleneck( 123 | 24, cfg[num_groups][0], 2, num_groups, group_conv1x1=False 124 | ), 125 | Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups), 126 | Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups), 127 | Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups), 128 | ) 129 | 130 | self.stage3 = nn.Sequential( 131 | Bottleneck(cfg[num_groups][0], cfg[num_groups][1], 2, num_groups), 132 | Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), 133 | Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), 134 | Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), 135 | Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), 136 | Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), 137 | Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), 138 | Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups), 139 | ) 140 | 141 | self.stage4 = nn.Sequential( 142 | Bottleneck(cfg[num_groups][1], cfg[num_groups][2], 2, num_groups), 143 | Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups), 144 | Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups), 145 | Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups), 146 | ) 147 | 148 | self.classifier = nn.Linear(cfg[num_groups][2], num_classes) 149 | self.feat_dim = cfg[num_groups][2] 150 | 151 | def forward(self, x): 152 | x = self.conv1(x) 153 | x = self.stage2(x) 154 | x = self.stage3(x) 155 | x = self.stage4(x) 156 | x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), -1) 157 | 158 | if not self.training: 159 | return x 160 | 161 | y = self.classifier(x) 162 | 163 | if self.loss == 'softmax': 164 | return y 165 | elif self.loss == 'triplet': 166 | return y, x 167 | else: 168 | raise KeyError('Unsupported loss: {}'.format(self.loss)) 169 | 170 | 171 | def init_pretrained_weights(model, model_url): 172 | """Initializes model with pretrained weights. 173 | 174 | Layers that don't match with pretrained layers in name or size are kept unchanged. 175 | """ 176 | pretrain_dict = model_zoo.load_url(model_url) 177 | model_dict = model.state_dict() 178 | pretrain_dict = { 179 | k: v 180 | for k, v in pretrain_dict.items() 181 | if k in model_dict and model_dict[k].size() == v.size() 182 | } 183 | model_dict.update(pretrain_dict) 184 | model.load_state_dict(model_dict) 185 | 186 | 187 | def shufflenet(num_classes, loss='softmax', pretrained=True, **kwargs): 188 | model = ShuffleNet(num_classes, loss, **kwargs) 189 | if pretrained: 190 | #init_pretrained_weights(model, model_urls['imagenet']) 191 | import warnings 192 | warnings.warn( 193 | 'The imagenet pretrained weights need to be manually downloaded from {}' 194 | .format(model_urls['imagenet']) 195 | ) 196 | return model 197 | -------------------------------------------------------------------------------- /deep_sort/deep/oldfeature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import cv2 5 | 6 | from .model import Net 7 | 8 | 9 | class Extractor(object): 10 | def __init__(self, model_path, use_cuda=True): 11 | self.net = Net(reid=True) 12 | self.device = "cuda" if torch.cuda.is_available( 13 | ) and use_cuda else "cpu" 14 | state_dict = torch.load( 15 | model_path, map_location=lambda storage, loc: storage)['net_dict'] 16 | 17 | self.net.load_state_dict(state_dict) 18 | print("Loading weights from {}... Done!".format(model_path)) 19 | self.net.to(self.device) 20 | self.size = (64, 128) 21 | self.norm = transforms.Compose([ 22 | transforms.ToTensor(), 23 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), 24 | ]) 25 | 26 | def _preprocess(self, im_crops): 27 | """ 28 | TODO: 29 | 1. to float with scale from 0 to 1 30 | 2. resize to (64, 128) as Market1501 dataset did 31 | 3. concatenate to a numpy array 32 | 3. to torch Tensor 33 | 4. normalize 34 | """ 35 | def _resize(im, size): 36 | return cv2.resize(im.astype(np.float32) / 255., size) 37 | 38 | im_batch = torch.cat([ 39 | self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops 40 | ], 41 | dim=0).float() 42 | return im_batch 43 | 44 | def __call__(self, im_crops): 45 | im_batch = self._preprocess(im_crops) 46 | with torch.no_grad(): 47 | im_batch = im_batch.to(self.device) 48 | features = self.net(im_batch) 49 | return features.cpu().numpy() 50 | -------------------------------------------------------------------------------- /deep_sort/deep/utils/assign_train_val.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import shutil 4 | from os.path import join 5 | import random 6 | 7 | root_dir = "./data/reid" 8 | train_dir = "./data/train2" 9 | val_dir = "./data/val2" 10 | 11 | train_percent = 0.6 12 | val_percent = 0.4 13 | 14 | 15 | def mkdir_if_not_exist(dir): 16 | if not os.path.exists(dir): 17 | os.makedirs(dir) 18 | else: 19 | print("%s exists." % dir) 20 | 21 | 22 | class_full_path = glob.glob(join(root_dir, "*")) 23 | 24 | for i in range(len(class_full_path)): 25 | class_name = os.path.basename(class_full_path[i]) 26 | 27 | train_new_dir = join(train_dir, class_name) 28 | val_new_dir = join(val_dir, class_name) 29 | 30 | mkdir_if_not_exist(train_new_dir) 31 | mkdir_if_not_exist(val_new_dir) 32 | 33 | all_class_files = glob.glob(join(class_full_path[i], "*.jpg")) 34 | 35 | train_class_files = random.sample( 36 | all_class_files, int(len(all_class_files) * train_percent)) 37 | 38 | for file_path in all_class_files: 39 | print("processing %s." % (file_path)) 40 | if file_path in train_class_files: 41 | # assign to train folder 42 | shutil.copy(file_path, join(train_new_dir, os.path.basename(file_path))) 43 | else: 44 | # assign to val folder 45 | shutil.copy(file_path, join(val_new_dir, os.path.basename(file_path))) 46 | -------------------------------------------------------------------------------- /deep_sort/deep/utils/center_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | nn.AdaptiveAvgPool2d 4 | class CenterLoss(nn.Module): 5 | """Center loss. 6 | 7 | Reference: 8 | Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. 9 | 10 | Args: 11 | num_classes (int): number of classes. 12 | feat_dim (int): feature dimension. 13 | """ 14 | def __init__(self, num_classes=10, feat_dim=2, use_gpu=True): 15 | super(CenterLoss, self).__init__() 16 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 17 | self.num_classes = num_classes 18 | self.feat_dim = feat_dim 19 | self.use_gpu = use_gpu 20 | 21 | if self.use_gpu: 22 | self.centers = nn.Parameter( 23 | torch.randn(self.num_classes, self.feat_dim).to(self.device)) 24 | else: 25 | self.centers = nn.Parameter( 26 | torch.randn(self.num_classes, self.feat_dim)) 27 | 28 | def forward(self, x, labels): 29 | """ 30 | Args: 31 | x: feature matrix with shape (batch_size, feat_dim). 32 | labels: ground truth labels with shape (batch_size). 33 | """ 34 | batch_size = x.size(0) 35 | distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \ 36 | torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand( 37 | self.num_classes, batch_size).t() 38 | distmat.addmm_(1, -2, x, self.centers.t()) 39 | 40 | classes = torch.arange(self.num_classes).long() 41 | if self.use_gpu: 42 | classes = classes.to(self.device) 43 | labels = labels.unsqueeze(1).expand(batch_size, self.num_classes) 44 | mask = labels.eq(classes.expand(batch_size, self.num_classes)) 45 | 46 | dist = distmat * mask.float() 47 | loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size 48 | 49 | return loss -------------------------------------------------------------------------------- /deep_sort/deep/utils/compute_mean_std.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compute channel-wise mean and standard deviation of a dataset. 3 | 4 | Usage: 5 | $ python compute_mean_std.py DATASET_ROOT DATASET_KEY 6 | 7 | - The first argument points to the root path where you put the datasets. 8 | - The second argument means the specific dataset key. 9 | 10 | For instance, your datasets are put under $DATA and you wanna 11 | compute the statistics of Market1501, do 12 | $ python compute_mean_std.py $DATA market1501 13 | """ 14 | import argparse 15 | import torch 16 | import torchvision 17 | 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--root', type=str) 22 | parser.add_argument('--sources', type=str) 23 | args = parser.parse_args() 24 | 25 | train_loader = torch.utils.data.DataLoader( 26 | torchvision.datasets.ImageFolder("data/train", 27 | transform=torchvision.transforms.ToTensor()), 28 | batch_size=6) 29 | 30 | print('Computing mean and std ...') 31 | mean = 0. 32 | std = 0. 33 | n_samples = 0. 34 | for data, label in train_loader: 35 | batch_size = data.size(0) 36 | data = data.view(batch_size, data.size(1), -1) 37 | mean += data.mean(2).sum(0) 38 | std += data.std(2).sum(0) 39 | n_samples += batch_size 40 | 41 | mean /= n_samples 42 | std /= n_samples 43 | print('Mean: {}'.format(mean)) 44 | print('Std: {}'.format(std)) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /deep_sort/deep/utils/rename_all.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import os.path as osp 4 | 5 | root_dir = r'C:\Users\pprp\Desktop\face\head_test' 6 | 7 | for i in os.listdir(root_dir): 8 | new_dir = osp.join(root_dir, i) 9 | for j in os.listdir(new_dir): 10 | jpg = osp.join(new_dir, j) 11 | name, frame, head = j.split("_") 12 | frame_no = int(frame) 13 | extend_no = '%04d' % frame_no 14 | newName = 'head_' + name + "_" + '%s.jpg' % (str(extend_no)) 15 | print('from %s to %s' % (j, newName)) 16 | os.rename(jpg, os.path.join(new_dir, newName)) -------------------------------------------------------------------------------- /deep_sort/deep/utils/tsne_vis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import argparse 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from matplotlib import offsetbox 7 | from sklearn import (manifold, datasets, decomposition, ensemble, 8 | discriminant_analysis, random_projection, neighbors) 9 | 10 | 11 | class tSNE_Visual(): 12 | def __init__(self): 13 | super(tSNE_Visual, self).__init__() 14 | self.parser = argparse.ArgumentParser() 15 | self.parser.add_argument('--Input', 16 | type=str, 17 | default='data/reid', 18 | help='the path of target dataset') 19 | self.parser.add_argument('--Size', 20 | type=int, 21 | default=100, 22 | help='the size of every class') 23 | self.parser.add_argument('--Zoom', 24 | type=float, 25 | default=0.1, 26 | help='the size of every class') 27 | self.parser.add_argument('--Output', 28 | type=str, 29 | default='t-SNE1.png', 30 | help='the out path of result image') 31 | 32 | def parse(self): 33 | self.opt = self.parser.parse_args() 34 | args = vars(self.opt) 35 | print('\n--- load options ---') 36 | for name, value in sorted(args.items()): 37 | print('%s: %s' % (str(name), str(value))) 38 | return self.opt 39 | 40 | def plot_embedding(self, X, _output, zoom, title=None): 41 | x_min, x_max = np.min(X, 0), np.max(X, 0) 42 | X = (X - x_min) / (x_max - x_min) 43 | 44 | plt.figure(figsize=(20, 20)) 45 | ax = plt.subplot(111) 46 | 47 | if hasattr(offsetbox, 'AnnotationBbox'): 48 | # only print thumbnails with matplotlib > 1.0 49 | shown_images = np.array([[1., 1.]]) # just something big 50 | for i in range(X.shape[0]): 51 | dist = np.sum((X[i] - shown_images)**2, 1) 52 | #if np.min(dist) < 4e-3: 53 | # don't show points that are too close 54 | # continue 55 | shown_images = np.r_[shown_images, [X[i]]] 56 | imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage( 57 | real_imgs[i], zoom=0.12, cmap=plt.cm.gray_r), 58 | X[i], 59 | pad=0) 60 | ax.add_artist(imagebox) 61 | '''for i in range(X.shape[0]): 62 | #cls = plt.text(X[i, 0], X[i, 1], _classes[y[i][0].astype(int)-1], 63 | cls = plt.text(X[i, 0], X[i, 1], str(y[i].astype(int)), 64 | #cls = plt.text(X[i, 0], X[i, 1], '★', 65 | color=_colors[int(y[i][0]-1)], 66 | fontdict={'weight': 'bold', 'size': 12}) 67 | cls.set_zorder(20) ''' 68 | 69 | ax.spines['top'].set_visible(False) 70 | ax.spines['right'].set_visible(False) 71 | ax.spines['bottom'].set_visible(False) 72 | ax.spines['left'].set_visible(False) 73 | plt.xticks([]), plt.yticks([]) 74 | if title is not None: 75 | plt.title(title) 76 | plt.savefig(_output) 77 | 78 | 79 | if __name__ == '__main__': 80 | # Disable the GUI matplotlib 81 | plt.switch_backend('agg') 82 | 83 | tsne_visual = tSNE_Visual() 84 | opts = tsne_visual.parse() 85 | dataroot = opts.Input 86 | _size = opts.Size 87 | _output = opts.Output 88 | _zoom = opts.Zoom 89 | 90 | dirs = [] 91 | for item in os.listdir(dataroot): 92 | if ('.ipynb_checkpoints' not in item): 93 | dirs.append(item) 94 | 95 | _len = len(dirs) 96 | y = np.zeros((_size * _len, 1)) 97 | for i in range(_len): 98 | y[i * _size:(i + 1) * _size] = i + 1 99 | 100 | imgs = [] 101 | real_imgs = [] 102 | for i in range(_len): 103 | single_cls = [] 104 | path = os.path.join(dataroot, dirs[i]) 105 | dataset_list = os.listdir(path) 106 | cnt = 0 107 | for item in dataset_list: 108 | if (cnt == _size): 109 | break 110 | if ('.ipynb_checkpoints' in item): 111 | continue 112 | data_path = os.path.join(path, item) 113 | temp = cv2.imread(data_path) 114 | real_img = cv2.cvtColor(temp, cv2.COLOR_BGR2RGB) 115 | imgs.append(temp.reshape(-1)) 116 | real_imgs.append(real_img) 117 | cnt = cnt + 1 118 | np_imgs = np.array(imgs) 119 | real_imgs = np.array(real_imgs) 120 | 121 | tsne = manifold.TSNE(n_components=2, init='random', random_state=0) 122 | print(np_imgs.shape) 123 | result = tsne.fit_transform(np_imgs) 124 | 125 | tsne_visual.plot_embedding(X=result, _output=_output, zoom=_zoom) -------------------------------------------------------------------------------- /deep_sort/deep_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .deep.feature_extractor import Extractor 4 | from .sort.nn_matching import NearestNeighborDistanceMetric 5 | from .sort.preprocessing import non_max_suppression 6 | from .sort.detection import Detection 7 | from .sort.tracker import Tracker 8 | 9 | __all__ = ['DeepSort'] 10 | 11 | 12 | class DeepSort(object): 13 | def __init__(self, model_path, max_dist=0.2): 14 | self.min_confidence = 0.3 15 | # yolov3中检测结果置信度阈值,筛选置信度小于0.3的detection。 16 | 17 | self.nms_max_overlap = 1.0 18 | # 非极大抑制阈值,设置为1代表不进行抑制 19 | 20 | # 用于提取图片的embedding,返回的是一个batch图片对应的特征 21 | self.extractor = Extractor("mobilenetv2_x1_0", 22 | model_path, 23 | use_cuda=True) 24 | 25 | max_cosine_distance = max_dist 26 | # 用在级联匹配的地方,如果大于改阈值,就直接忽略 27 | nn_budget = 100 28 | # 预算,每个类别最多的样本个数,如果超过,删除旧的 29 | 30 | # 第一个参数可选'cosine' or 'euclidean' 31 | metric = NearestNeighborDistanceMetric("cosine", 32 | max_cosine_distance, 33 | nn_budget) 34 | self.tracker = Tracker(metric) 35 | 36 | def update(self, bbox_xywh, confidences, ori_img): 37 | self.height, self.width = ori_img.shape[:2] 38 | # generate detections 39 | features = self._get_features(bbox_xywh, ori_img) 40 | # 从原图中crop bbox对应图片并计算得到embedding 41 | bbox_tlwh = self._xywh_to_tlwh(bbox_xywh) 42 | 43 | detections = [ 44 | Detection(bbox_tlwh[i], conf, features[i]) 45 | for i, conf in enumerate(confidences) if conf > self.min_confidence 46 | ] # 筛选小于min_confidence的目标,并构造一个Detection对象构成的列表 47 | # Detection是一个存储图中一个bbox结果 48 | # 需要:1. bbox(tlwh形式) 2. 对应置信度 3. 对应embedding 49 | 50 | # run on non-maximum supression 51 | boxes = np.array([d.tlwh for d in detections]) 52 | scores = np.array([d.confidence for d in detections]) 53 | 54 | # 使用非极大抑制 55 | # 默认nms_thres=1的时候开启也没有用,实际上并没有进行非极大抑制 56 | indices = non_max_suppression(boxes, self.nms_max_overlap, scores) 57 | detections = [detections[i] for i in indices] 58 | 59 | # update tracker 60 | # tracker给出一个预测结果,然后将detection传入,进行卡尔曼滤波操作 61 | self.tracker.predict() 62 | self.tracker.update(detections) 63 | 64 | # output bbox identities 65 | # 存储结果以及可视化 66 | outputs = [] 67 | for track in self.tracker.tracks: 68 | if not track.is_confirmed() or track.time_since_update > 1: 69 | continue 70 | box = track.to_tlwh() 71 | x1, y1, x2, y2 = self._tlwh_to_xyxy(box) 72 | track_id = track.track_id 73 | outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int)) 74 | 75 | if len(outputs) > 0: 76 | outputs = np.stack(outputs, axis=0) 77 | return np.array(outputs) 78 | 79 | """ 80 | TODO: 81 | Convert bbox from xc_yc_w_h to xtl_ytl_w_h 82 | Thanks JieChen91@github.com for reporting this bug! 83 | """ 84 | 85 | @staticmethod 86 | def _xywh_to_tlwh(bbox_xywh): 87 | bbox_xywh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. 88 | bbox_xywh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. 89 | return bbox_xywh 90 | 91 | def _xywh_to_xyxy(self, bbox_xywh): 92 | x, y, w, h = bbox_xywh 93 | x1 = max(int(x - w / 2), 0) 94 | x2 = min(int(x + w / 2), self.width - 1) 95 | y1 = max(int(y - h / 2), 0) 96 | y2 = min(int(y + h / 2), self.height - 1) 97 | return x1, y1, x2, y2 98 | 99 | def _tlwh_to_xyxy(self, bbox_tlwh): 100 | """ 101 | TODO: 102 | Convert bbox from xtl_ytl_w_h to xc_yc_w_h 103 | Thanks JieChen91@github.com for reporting this bug! 104 | """ 105 | x, y, w, h = bbox_tlwh 106 | x1 = max(int(x), 0) 107 | x2 = min(int(x + w), self.width - 1) 108 | y1 = max(int(y), 0) 109 | y2 = min(int(y + h), self.height - 1) 110 | return x1, y1, x2, y2 111 | 112 | def _get_features(self, bbox_xywh, ori_img): 113 | im_crops = [] 114 | for box in bbox_xywh: 115 | x1, y1, x2, y2 = self._xywh_to_xyxy(box) 116 | im = ori_img[y1:y2, x1:x2] 117 | im_crops.append(im) 118 | if im_crops: 119 | # 在这里调用并提取embedding 120 | features = self.extractor(im_crops) 121 | else: 122 | features = np.array([]) 123 | return features 124 | -------------------------------------------------------------------------------- /deep_sort/sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/sort/__init__.py -------------------------------------------------------------------------------- /deep_sort/sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | self.confidence = float(confidence) 32 | self.feature = np.asarray(feature, dtype=np.float32) 33 | 34 | def to_tlbr(self): 35 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 36 | `(top left, bottom right)`. 37 | """ 38 | ret = self.tlwh.copy() 39 | ret[2:] += ret[:2] 40 | return ret 41 | 42 | def to_xyah(self): 43 | """Convert bounding box to format `(center x, center y, aspect ratio, 44 | height)`, where the aspect ratio is `width / height`. 45 | """ 46 | ret = self.tlwh.copy() 47 | ret[:2] += ret[2:] / 2 48 | ret[2] /= ret[3] 49 | return ret 50 | -------------------------------------------------------------------------------- /deep_sort/sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from . import linear_assignment 4 | ''' 5 | 功能列表 6 | ''' 7 | 8 | 9 | def iou(bbox, candidates): 10 | # 计算iou 11 | """Computer intersection over union. 12 | 13 | Parameters 14 | ---------- 15 | bbox : ndarray 16 | A bounding box in format `(top left x, top left y, width, height)`. 17 | candidates : ndarray 18 | A matrix of candidate bounding boxes (one per row) in the same format 19 | as `bbox`. 20 | 21 | Returns 22 | ------- 23 | ndarray 24 | The intersection over union in [0, 1] between the `bbox` and each 25 | candidate. A higher score means a larger fraction of the `bbox` is 26 | occluded by the candidate. 27 | 28 | """ 29 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 30 | candidates_tl = candidates[:, :2] 31 | candidates_br = candidates[:, :2] + candidates[:, 2:] 32 | 33 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 34 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 35 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 36 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 37 | wh = np.maximum(0., br - tl) 38 | 39 | area_intersection = wh.prod(axis=1) 40 | area_bbox = bbox[2:].prod() 41 | area_candidates = candidates[:, 2:].prod(axis=1) 42 | return area_intersection / (area_bbox + area_candidates - area_intersection) 43 | 44 | 45 | def iou_cost(tracks, detections, track_indices=None, 46 | detection_indices=None): 47 | # 计算track和detection之间的iou距离矩阵 48 | """An intersection over union distance metric. 49 | 50 | Parameters 51 | ---------- 52 | tracks : List[deep_sort.track.Track] 53 | A list of tracks. 54 | detections : List[deep_sort.detection.Detection] 55 | A list of detections. 56 | track_indices : Optional[List[int]] 57 | A list of indices to tracks that should be matched. Defaults to 58 | all `tracks`. 59 | detection_indices : Optional[List[int]] 60 | A list of indices to detections that should be matched. Defaults 61 | to all `detections`. 62 | 63 | Returns 64 | ------- 65 | ndarray 66 | Returns a cost matrix of shape 67 | len(track_indices), len(detection_indices) where entry (i, j) is 68 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 69 | 70 | """ 71 | if track_indices is None: 72 | track_indices = np.arange(len(tracks)) 73 | if detection_indices is None: 74 | detection_indices = np.arange(len(detections)) 75 | 76 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 77 | for row, track_idx in enumerate(track_indices): 78 | if tracks[track_idx].time_since_update > 1: 79 | cost_matrix[row, :] = linear_assignment.INFTY_COST 80 | continue 81 | 82 | bbox = tracks[track_idx].to_tlwh() 83 | candidates = np.asarray( 84 | [detections[i].tlwh for i in detection_indices]) 85 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 86 | return cost_matrix 87 | -------------------------------------------------------------------------------- /deep_sort/sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | # 计算欧氏距离 6 | def _pdist(a, b): 7 | # 用于计算成对的平方距离 8 | # a NxM 代表N个对象,每个对象有M个数值作为embedding进行比较 9 | # b LxM 代表L个对象,每个对象有M个数值作为embedding进行比较 10 | # 返回的是NxL的矩阵,比如dist[i][j]代表a[i]和b[j]之间的平方和距离 11 | # 实现见:https://blog.csdn.net/frankzd/article/details/80251042 12 | """Compute pair-wise squared distance between points in `a` and `b`. 13 | 14 | Parameters 15 | ---------- 16 | a : array_like 17 | An NxM matrix of N samples of dimensionality M. 18 | b : array_like 19 | An LxM matrix of L samples of dimensionality M. 20 | 21 | Returns 22 | ------- 23 | ndarray 24 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 25 | contains the squared distance between `a[i]` and `b[j]`. 26 | 27 | """ 28 | a, b = np.asarray(a), np.asarray(b) # 拷贝一份数据 29 | if len(a) == 0 or len(b) == 0: 30 | return np.zeros((len(a), len(b))) 31 | a2, b2 = np.square(a).sum(axis=1), np.square( 32 | b).sum(axis=1) # 求每个embedding的平方和 33 | # sum(N) + sum(L) -2 x [NxM]x[MxL] = [NxL] 34 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 35 | r2 = np.clip(r2, 0., float(np.inf)) 36 | return r2 37 | 38 | 39 | def _cosine_distance(a, b, data_is_normalized=False): 40 | # a和b之间的余弦距离 41 | # a : [NxM] b : [LxM] 42 | # 余弦距离 = 1 - 余弦相似度 43 | # https://blog.csdn.net/u013749540/article/details/51813922 44 | """Compute pair-wise cosine distance between points in `a` and `b`. 45 | 46 | Parameters 47 | ---------- 48 | a : array_like 49 | An NxM matrix of N samples of dimensionality M. 50 | b : array_like 51 | An LxM matrix of L samples of dimensionality M. 52 | data_is_normalized : Optional[bool] 53 | If True, assumes rows in a and b are unit length vectors. 54 | Otherwise, a and b are explicitly normalized to lenght 1. 55 | 56 | Returns 57 | ------- 58 | ndarray 59 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 60 | contains the squared distance between `a[i]` and `b[j]`. 61 | 62 | """ 63 | if not data_is_normalized: 64 | # 需要将余弦相似度转化成类似欧氏距离的余弦距离。 65 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 66 | # np.linalg.norm 操作是求向量的范式,默认是L2范式,等同于求向量的欧式距离。 67 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 68 | return 1. - np.dot(a, b.T) 69 | 70 | 71 | def _nn_euclidean_distance(x, y): 72 | # 最近邻欧氏距离 73 | """ Helper function for nearest neighbor distance metric (Euclidean). 74 | 75 | Parameters 76 | ---------- 77 | x : ndarray 78 | A matrix of N row-vectors (sample points). 79 | y : ndarray 80 | A matrix of M row-vectors (query points). 81 | 82 | Returns 83 | ------- 84 | ndarray 85 | A vector of length M that contains for each entry in `y` the 86 | smallest Euclidean distance to a sample in `x`. 87 | 88 | """ 89 | distances = _pdist(x, y) 90 | return np.maximum(0.0, distances.min(axis=0)) # 找到最小值 91 | 92 | 93 | def _nn_cosine_distance(x, y): 94 | # 最近邻余弦距离 95 | """ Helper function for nearest neighbor distance metric (cosine). 96 | 97 | Parameters 98 | ---------- 99 | x : ndarray 100 | A matrix of N row-vectors (sample points). 101 | y : ndarray 102 | A matrix of M row-vectors (query points). 103 | 104 | Returns 105 | ------- 106 | ndarray 107 | A vector of length M that contains for each entry in `y` the 108 | smallest cosine distance to a sample in `x`. 109 | 110 | """ 111 | distances = _cosine_distance(x, y) 112 | return distances.min(axis=0) 113 | 114 | 115 | class NearestNeighborDistanceMetric(object): 116 | # 对于每个目标,返回一个最近的距离 117 | """ 118 | A nearest neighbor distance metric that, for each target, returns 119 | the closest distance to any sample that has been observed so far. 120 | 121 | Parameters 122 | ---------- 123 | metric : str 124 | Either "euclidean" or "cosine". 125 | matching_threshold: float 126 | The matching threshold. Samples with larger distance are considered an 127 | invalid match. 128 | budget : Optional[int] 129 | If not None, fix samples per class to at most this number. Removes 130 | the oldest samples when the budget is reached. 131 | 132 | Attributes 133 | ---------- 134 | samples : Dict[int -> List[ndarray]] 135 | A dictionary that maps from target identities to the list of samples 136 | that have been observed so far. 137 | 138 | """ 139 | 140 | def __init__(self, metric, matching_threshold, budget=None): 141 | # 默认matching_threshold = 0.2 budge = 100 142 | if metric == "euclidean": 143 | # 使用最近邻欧氏距离 144 | self._metric = _nn_euclidean_distance 145 | elif metric == "cosine": 146 | # 使用最近邻余弦距离 147 | self._metric = _nn_cosine_distance 148 | else: 149 | raise ValueError( 150 | "Invalid metric; must be either 'euclidean' or 'cosine'") 151 | 152 | self.matching_threshold = matching_threshold 153 | # matching_threshold是在级联匹配的函数中调用 154 | self.budget = budget 155 | # budge 预算,控制feature的多少 156 | 157 | self.samples = {} 158 | # samples是一个字典{id->feature list} 159 | 160 | def partial_fit(self, features, targets, active_targets): 161 | # 作用:部分拟合,用新的数据更新测量距离 162 | # 调用:在特征集更新模块部分调用,tracker.update()中 163 | """Update the distance metric with new data. 164 | 165 | Parameters 166 | ---------- 167 | features : ndarray 168 | An NxM matrix of N features of dimensionality M. 169 | targets : ndarray 170 | An integer array of associated target identities. 171 | active_targets : List[int] 172 | A list of targets that are currently present in the scene. 173 | """ 174 | for feature, target in zip(features, targets): 175 | self.samples.setdefault(target, []).append(feature) 176 | # 对应目标下添加新的feature,更新feature集合 177 | # 目标id : feature list 178 | if self.budget is not None: 179 | self.samples[target] = self.samples[target][-self.budget:] 180 | # 设置预算,每个类最多多少个目标,超过直接忽略 181 | 182 | # 筛选激活的目标 183 | self.samples = {k: self.samples[k] for k in active_targets} 184 | 185 | def distance(self, features, targets): 186 | # 作用:比较feature和targets之间的距离,返回一个代价矩阵 187 | # 调用:在匹配阶段,将distance封装为gated_metric, 188 | # 进行外观信息(reid得到的深度特征)+ 189 | # 运动信息(马氏距离用于度量两个分布相似程度) 190 | """Compute distance between features and targets. 191 | 192 | Parameters 193 | ---------- 194 | features : ndarray 195 | An NxM matrix of N features of dimensionality M. 196 | targets : List[int] 197 | A list of targets to match the given `features` against. 198 | 199 | Returns 200 | ------- 201 | ndarray 202 | Returns a cost matrix of shape len(targets), len(features), where 203 | element (i, j) contains the closest squared distance between 204 | `targets[i]` and `features[j]`. 205 | 206 | """ 207 | cost_matrix = np.zeros((len(targets), len(features))) 208 | for i, target in enumerate(targets): 209 | cost_matrix[i, :] = self._metric(self.samples[target], features) 210 | return cost_matrix 211 | -------------------------------------------------------------------------------- /deep_sort/sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> scores = [d.confidence for d in detections] 19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 20 | >>> detections = [detections[i] for i in indices] 21 | 22 | Parameters 23 | ---------- 24 | boxes : ndarray 25 | Array of ROIs (x, y, width, height). 26 | max_bbox_overlap : float 27 | ROIs that overlap more than this values are suppressed. 28 | scores : Optional[array_like] 29 | Detector confidence score. 30 | 31 | Returns 32 | ------- 33 | List[int] 34 | Returns indices of detections that have survived non-maxima suppression. 35 | 36 | """ 37 | if len(boxes) == 0: 38 | return [] 39 | 40 | boxes = boxes.astype(np.float) 41 | pick = [] 42 | 43 | x1 = boxes[:, 0] 44 | y1 = boxes[:, 1] 45 | x2 = boxes[:, 2] + boxes[:, 0] 46 | y2 = boxes[:, 3] + boxes[:, 1] 47 | 48 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | if scores is not None: 50 | idxs = np.argsort(scores) 51 | else: 52 | idxs = np.argsort(y2) 53 | 54 | while len(idxs) > 0: 55 | last = len(idxs) - 1 56 | i = idxs[last] 57 | pick.append(i) 58 | 59 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 60 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 61 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 62 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 63 | 64 | w = np.maximum(0, xx2 - xx1 + 1) 65 | h = np.maximum(0, yy2 - yy1 + 1) 66 | 67 | overlap = (w * h) / area[idxs[:last]] 68 | 69 | idxs = np.delete( 70 | idxs, np.concatenate( 71 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 72 | 73 | return pick 74 | -------------------------------------------------------------------------------- /deep_sort/sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | 3 | 4 | class TrackState: 5 | """ 6 | Enumeration type for the single target track state. Newly created tracks are 7 | classified as `tentative` until enough evidence has been collected. Then, 8 | the track state is changed to `confirmed`. Tracks that are no longer alive 9 | are classified as `deleted` to mark them for removal from the set of active 10 | tracks. 11 | 12 | """ 13 | 14 | Tentative = 1 15 | Confirmed = 2 16 | Deleted = 3 17 | 18 | 19 | class Track: 20 | # 一个轨迹的信息,包含(x,y,a,h) & v 21 | """ 22 | A single target track with state space `(x, y, a, h)` and associated 23 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 24 | aspect ratio and `h` is the height. 25 | 26 | Parameters 27 | ---------- 28 | mean : ndarray 29 | Mean vector of the initial state distribution. 30 | covariance : ndarray 31 | Covariance matrix of the initial state distribution. 32 | track_id : int 33 | A unique track identifier. 34 | n_init : int 35 | Number of consecutive detections before the track is confirmed. The 36 | track state is set to `Deleted` if a miss occurs within the first 37 | `n_init` frames. 38 | max_age : int 39 | The maximum number of consecutive misses before the track state is 40 | set to `Deleted`. 41 | feature : Optional[ndarray] 42 | Feature vector of the detection this track originates from. If not None, 43 | this feature is added to the `features` cache. 44 | 45 | Attributes 46 | ---------- 47 | mean : ndarray 48 | Mean vector of the initial state distribution. 49 | covariance : ndarray 50 | Covariance matrix of the initial state distribution. 51 | track_id : int 52 | A unique track identifier. 53 | hits : int 54 | Total number of measurement updates. 55 | age : int 56 | Total number of frames since first occurance. 57 | time_since_update : int 58 | Total number of frames since last measurement update. 59 | state : TrackState 60 | The current track state. 61 | features : List[ndarray] 62 | A cache of features. On each measurement update, the associated feature 63 | vector is added to this list. 64 | 65 | """ 66 | 67 | def __init__(self, mean, covariance, track_id, n_init, max_age, 68 | feature=None): 69 | # max age是一个存活期限,默认为70帧 70 | self.mean = mean 71 | self.covariance = covariance 72 | self.track_id = track_id 73 | self.hits = 1 74 | # hits和n_init进行比较 75 | # hits每次update的时候进行一次更新(只有match的时候才进行update) 76 | # hits代表匹配上了多少次,匹配次数超过n_init就会设置为confirmed状态 77 | self.age = 1 # 没有用到,和time_since_update功能重复 78 | self.time_since_update = 0 79 | # 每次调用predict函数的时候就会+1 80 | # 每次调用update函数的时候就会设置为0 81 | 82 | self.state = TrackState.Tentative 83 | self.features = [] 84 | # 每个track对应多个features, 每次更新都将最新的feature添加到列表中 85 | if feature is not None: 86 | self.features.append(feature) 87 | 88 | self._n_init = n_init # 如果连续n_init帧都没有出现匹配,设置为deleted状态 89 | self._max_age = max_age # 上限 90 | 91 | def to_tlwh(self): 92 | """Get current position in bounding box format `(top left x, top left y, 93 | width, height)`. 94 | 95 | Returns 96 | ------- 97 | ndarray 98 | The bounding box. 99 | 100 | """ 101 | ret = self.mean[:4].copy() 102 | ret[2] *= ret[3] 103 | ret[:2] -= ret[2:] / 2 104 | return ret 105 | 106 | def to_tlbr(self): 107 | """Get current position in bounding box format `(min x, miny, max x, 108 | max y)`. 109 | 110 | Returns 111 | ------- 112 | ndarray 113 | The bounding box. 114 | 115 | """ 116 | ret = self.to_tlwh() 117 | ret[2:] = ret[:2] + ret[2:] 118 | return ret 119 | 120 | def predict(self, kf): 121 | # 预测结果 122 | """Propagate the state distribution to the current time step using a 123 | Kalman filter prediction step. 124 | 125 | Parameters 126 | ---------- 127 | kf : kalman_filter.KalmanFilter 128 | The Kalman filter. 129 | 130 | """ 131 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 132 | self.age += 1 133 | self.time_since_update += 1 134 | 135 | def update(self, kf, detection): 136 | # 将预测结果和观测结果结合 137 | """Perform Kalman filter measurement update step and update the feature 138 | cache. 139 | 140 | Parameters 141 | ---------- 142 | kf : kalman_filter.KalmanFilter 143 | The Kalman filter. 144 | detection : Detection 145 | The associated detection. 146 | """ 147 | self.mean, self.covariance = kf.update( 148 | self.mean, self.covariance, detection.to_xyah()) 149 | self.features.append(detection.feature) 150 | 151 | self.hits += 1 152 | self.time_since_update = 0 153 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 154 | self.state = TrackState.Confirmed 155 | 156 | def mark_missed(self): 157 | # 非常重要 158 | """Mark this track as missed (no association at the current time step). 159 | """ 160 | if self.state == TrackState.Tentative: 161 | self.state = TrackState.Deleted 162 | elif self.time_since_update > self._max_age: 163 | self.state = TrackState.Deleted 164 | 165 | def is_tentative(self): 166 | """Returns True if this track is tentative (unconfirmed). 167 | """ 168 | return self.state == TrackState.Tentative 169 | 170 | def is_confirmed(self): 171 | """Returns True if this track is confirmed.""" 172 | return self.state == TrackState.Confirmed 173 | 174 | def is_deleted(self): 175 | """Returns True if this track is dead and should be deleted.""" 176 | return self.state == TrackState.Deleted 177 | -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from sys import platform 3 | 4 | from models import * # set ONNX_EXPORT in models.py 5 | from utils.datasets import * 6 | from utils.utils import * 7 | 8 | 9 | def detect(save_txt=False, save_img=False): 10 | img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) 11 | out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img 12 | webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') 13 | 14 | # Initialize 15 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device) 16 | if os.path.exists(out): 17 | shutil.rmtree(out) # delete output folder 18 | os.makedirs(out) # make new output folder 19 | 20 | # Initialize model 21 | model = Darknet(opt.cfg, img_size) 22 | 23 | # Load weights 24 | attempt_download(weights) 25 | if weights.endswith('.pt'): # pytorch format 26 | model.load_state_dict(torch.load(weights, map_location=device)['model']) 27 | else: # darknet format 28 | _ = load_darknet_weights(model, weights) 29 | 30 | # Second-stage classifier 31 | classify = False 32 | if classify: 33 | modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize 34 | modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights 35 | modelc.to(device).eval() 36 | 37 | # Fuse Conv2d + BatchNorm2d layers 38 | # model.fuse() 39 | 40 | # Eval mode 41 | model.to(device).eval() 42 | 43 | # Export mode 44 | if ONNX_EXPORT: 45 | img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) 46 | torch.onnx.export(model, img, 'weights/export.onnx', verbose=False, opset_version=10) 47 | 48 | # Validate exported model 49 | import onnx 50 | model = onnx.load('weights/export.onnx') # Load the ONNX model 51 | onnx.checker.check_model(model) # Check that the IR is well formed 52 | print(onnx.helper.printable_graph(model.graph)) # Print a human readable representation of the graph 53 | return 54 | 55 | # Half precision 56 | half = half and device.type != 'cpu' # half precision only supported on CUDA 57 | if half: 58 | model.half() 59 | 60 | # Set Dataloader 61 | vid_path, vid_writer = None, None 62 | if webcam: 63 | view_img = True 64 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference 65 | dataset = LoadStreams(source, img_size=img_size, half=half) 66 | else: 67 | save_img = True 68 | dataset = LoadImages(source, img_size=img_size, half=half) 69 | 70 | # Get names and colors 71 | names = load_classes(opt.names) 72 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] 73 | 74 | # Run inference 75 | t0 = time.time() 76 | for path, img, im0s, vid_cap in dataset: 77 | t = time.time() 78 | 79 | # Get detections 80 | img = torch.from_numpy(img).to(device) 81 | if img.ndimension() == 3: 82 | img = img.unsqueeze(0) 83 | pred = model(img)[0] 84 | 85 | if opt.half: 86 | pred = pred.float() 87 | 88 | # Apply NMS 89 | pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres) 90 | 91 | # Apply 92 | if classify: 93 | pred = apply_classifier(pred, modelc, img, im0s) 94 | 95 | # Process detections 96 | for i, det in enumerate(pred): # detections per image 97 | if webcam: # batch_size >= 1 98 | p, s, im0 = path[i], '%g: ' % i, im0s[i] 99 | else: 100 | p, s, im0 = path, '', im0s 101 | 102 | save_path = str(Path(out) / Path(p).name) 103 | s += '%gx%g ' % img.shape[2:] # print string 104 | if det is not None and len(det): 105 | # Rescale boxes from img_size to im0 size 106 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 107 | 108 | # Print results 109 | for c in det[:, -1].unique(): 110 | n = (det[:, -1] == c).sum() # detections per class 111 | s += '%g %ss, ' % (n, names[int(c)]) # add to string 112 | 113 | # Write results 114 | for *xyxy, conf, _, cls in det: 115 | if save_txt: # Write to file 116 | with open(save_path + '.txt', 'a') as file: 117 | file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) 118 | 119 | if save_img or view_img: # Add bbox to image 120 | label = '%s %.2f' % (names[int(cls)], conf) 121 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) 122 | 123 | print('%sDone. (%.3fs)' % (s, time.time() - t)) 124 | 125 | # Stream results 126 | if view_img: 127 | cv2.imshow(p, im0) 128 | if cv2.waitKey(1) == ord('q'): # q to quit 129 | raise StopIteration 130 | 131 | # Save results (image with detections) 132 | if save_img: 133 | if dataset.mode == 'images': 134 | cv2.imwrite(save_path, im0) 135 | else: 136 | if vid_path != save_path: # new video 137 | vid_path = save_path 138 | if isinstance(vid_writer, cv2.VideoWriter): 139 | vid_writer.release() # release previous video writer 140 | 141 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 142 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 143 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 144 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) 145 | vid_writer.write(im0) 146 | 147 | if save_txt or save_img: 148 | print('Results saved to %s' % os.getcwd() + os.sep + out) 149 | if platform == 'darwin': # MacOS 150 | os.system('open ' + out + ' ' + save_path) 151 | 152 | print('Done. (%.3fs)' % (time.time() - t0)) 153 | 154 | 155 | if __name__ == '__main__': 156 | parser = argparse.ArgumentParser() 157 | parser.add_argument('--cfg', type=str, default='cfg/yolov3-1cls.cfg', help='*.cfg path') 158 | parser.add_argument('--names', type=str, default='data/voc_small.names', help='*.names path') 159 | parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file') 160 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam 161 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder 162 | parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)') 163 | parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold') 164 | parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression') 165 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)') 166 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference') 167 | parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu') 168 | parser.add_argument('--view-img', action='store_true', help='display results') 169 | opt = parser.parse_args() 170 | print(opt) 171 | 172 | with torch.no_grad(): 173 | detect() 174 | -------------------------------------------------------------------------------- /eval_mot.py: -------------------------------------------------------------------------------- 1 | # py-motmetrics - Metrics for multiple object tracker (MOT) benchmarking. 2 | # https://github.com/cheind/py-motmetrics/ 3 | # 4 | # MIT License 5 | # Copyright (c) 2017-2020 Christoph Heindl, Jack Valmadre and others. 6 | # See LICENSE file for terms. 7 | """Compute metrics for trackers using MOTChallenge ground-truth data.""" 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import argparse 14 | from collections import OrderedDict 15 | import glob 16 | import logging 17 | import os 18 | from pathlib import Path 19 | 20 | import motmetrics as mm 21 | 22 | 23 | def parse_args(): 24 | """Defines and parses command-line arguments.""" 25 | parser = argparse.ArgumentParser( 26 | description=""" 27 | Compute metrics for trackers using MOTChallenge ground-truth data. 28 | Files 29 | ----- 30 | All file content, ground truth and test files, have to comply with the 31 | format described in 32 | Milan, Anton, et al. 33 | "Mot16: A benchmark for multi-object tracking." 34 | arXiv preprint arXiv:1603.00831 (2016). 35 | https://motchallenge.net/ 36 | Structure 37 | --------- 38 | Layout for ground truth data 39 | //gt/gt.txt 40 | //gt/gt.txt 41 | ... 42 | Layout for test data 43 | /.txt 44 | /.txt 45 | ... 46 | Sequences of ground truth and test will be matched according to the `` 47 | string.""", 48 | formatter_class=argparse.RawTextHelpFormatter) 49 | 50 | parser.add_argument('--groundtruths', 51 | type=str, 52 | default="./data/videosample", 53 | help='Directory containing ground truth files.') 54 | parser.add_argument('--tests', 55 | type=str, 56 | default="./data/videoresult", 57 | help='Directory containing tracker result files') 58 | parser.add_argument('--loglevel', 59 | type=str, 60 | help='Log level', 61 | default='info') 62 | parser.add_argument('--fmt', type=str, help='Data format', default='mot16') 63 | parser.add_argument('--solver', 64 | type=str, 65 | help='LAP solver to use for matching between frames.') 66 | parser.add_argument( 67 | '--id_solver', 68 | type=str, 69 | help='LAP solver to use for ID metrics. Defaults to --solver.') 70 | parser.add_argument('--exclude_id', 71 | dest='exclude_id', 72 | default=False, 73 | action='store_true', 74 | help='Disable ID metrics') 75 | return parser.parse_args() 76 | 77 | 78 | def compare_dataframes(gts, ts): 79 | """Builds accumulator for each sequence.""" 80 | accs = [] 81 | names = [] 82 | for k, tsacc in ts.items(): 83 | # print(k) 84 | # print(gts) 85 | if k in gts: 86 | logging.info('Comparing %s...', k) 87 | accs.append( 88 | mm.utils.compare_to_groundtruth(gts[k], 89 | tsacc, 90 | 'iou', 91 | distth=0.5)) 92 | names.append(k) 93 | else: 94 | logging.warning('No ground truth for %s, skipping.', k) 95 | 96 | return accs, names 97 | 98 | 99 | if __name__ == '__main__': 100 | # pylint: disable=missing-function-docstring 101 | args = parse_args() 102 | 103 | loglevel = getattr(logging, args.loglevel.upper(), None) 104 | if not isinstance(loglevel, int): 105 | raise ValueError('Invalid log level: {} '.format(args.loglevel)) 106 | logging.basicConfig(level=loglevel, 107 | format='%(asctime)s %(levelname)s - %(message)s', 108 | datefmt='%I:%M:%S') 109 | 110 | if args.solver: 111 | mm.lap.default_solver = args.solver 112 | 113 | gtfiles = glob.glob(os.path.join(args.groundtruths, 'cutout*/gt.txt')) 114 | tsfiles = [ 115 | f for f in glob.glob(os.path.join(args.tests, '*.txt')) 116 | if not os.path.basename(f).startswith('eval') 117 | ] 118 | 119 | # print(gtfiles,'\n\n', tsfiles) 120 | 121 | logging.info('Found %d groundtruths and %d test files.', len(gtfiles), 122 | len(tsfiles)) 123 | logging.info('Available LAP solvers %s', str(mm.lap.available_solvers)) 124 | logging.info('Default LAP solver \'%s\'', mm.lap.default_solver) 125 | logging.info('Loading files.') 126 | 127 | gt = OrderedDict([(Path(f).parts[-2], 128 | mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1)) 129 | for f in gtfiles]) 130 | ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], 131 | mm.io.loadtxt(f, fmt=args.fmt)) for f in tsfiles]) 132 | 133 | mh = mm.metrics.create() 134 | accs, names = compare_dataframes(gt, ts) 135 | 136 | metrics = list(mm.metrics.motchallenge_metrics) 137 | if args.exclude_id: 138 | metrics = [x for x in metrics if not x.startswith('id')] 139 | 140 | logging.info('Running metrics') 141 | 142 | if args.id_solver: 143 | mm.lap.default_solver = args.id_solver 144 | summary = mh.compute_many(accs, 145 | names=names, 146 | metrics=metrics, 147 | generate_overall=True) 148 | print( 149 | mm.io.render_summary(summary, 150 | formatters=mh.formatters, 151 | namemap=mm.io.motchallenge_metric_names)) 152 | logging.info('Completed') 153 | -------------------------------------------------------------------------------- /miniversion/cow.names: -------------------------------------------------------------------------------- 1 | cow 2 | -------------------------------------------------------------------------------- /miniversion/predict.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | ''' 3 | @File : predict.py 4 | @Time : 2019/12/29 16:33:04 5 | @Author : pprp 6 | @Contact : 1115957667@qq.com 7 | @License : (C)Copyright 2018-2019 8 | @Desc : None 9 | ''' 10 | 11 | # here put the import lib 12 | import torch 13 | import time 14 | import cv2 15 | import numpy as np 16 | import os 17 | from PIL import Image 18 | 19 | from models import * 20 | from utils.datasets import * 21 | from utils.utils import * 22 | 23 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 24 | 25 | 26 | class InferYOLOv3(object): 27 | def __init__(self, 28 | cfg, 29 | img_size, 30 | weight_path, 31 | data_cfg, 32 | device, 33 | conf_thres=0.5, 34 | nms_thres=0.5): 35 | self.cfg = cfg 36 | self.img_size = img_size 37 | self.weight_path = weight_path 38 | # self.img_file = img_file 39 | self.device = device 40 | self.model = Darknet(cfg).to(device) 41 | self.model.load_state_dict( 42 | torch.load(weight_path, map_location=device)['model']) 43 | self.model.to(device).eval() 44 | self.classes = load_classes(parse_data_cfg(data_cfg)['names']) 45 | self.colors = [random.randint(0, 255) for _ in range(3)] 46 | self.conf_thres = conf_thres 47 | self.nms_thres = nms_thres 48 | 49 | def predict(self, im0): 50 | # singleDataloader = LoadSingleImages(img_file, img_size=img_size) 51 | # path, img, im0 = singleDataloader.__next__() 52 | 53 | img, _, _ = letterbox(im0, new_shape=self.img_size) 54 | 55 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB 56 | img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 57 | img /= 255.0 58 | 59 | # TODO: how to get img and im0 60 | 61 | img = torch.from_numpy(img).unsqueeze(0).to(self.device) 62 | pred, _ = self.model(img) 63 | det = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] 64 | 65 | if det is not None and len(det) > 0: 66 | # Rescale boxes from 416 to true image size 67 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], 68 | im0.shape).round() 69 | 70 | # Print results to screen 71 | print('%gx%g ' % img.shape[2:], end='') # print image size 72 | for c in det[:, -1].unique(): 73 | n = (det[:, -1] == c).sum() 74 | print('%g %ss' % (n, self.classes[int(c)]), end=', ') 75 | 76 | img = np.array(img.cpu()) 77 | # Draw bounding boxes and labels of detections 78 | 79 | bboxes, confs, cls_confs, cls_ids = [], [], [], [] 80 | 81 | for *xyxy, conf, cls_conf, cls_id in det: 82 | # label = '%s %.2f' % (classes[int(cls_id)], conf) 83 | bboxes.append(xyxy) 84 | confs.append(conf) 85 | cls_confs.append(cls_conf) 86 | cls_ids.append(cls_id) 87 | # plot_one_box(xyxy, im0, label=label, color=colors) 88 | return np.array(bboxes), np.array(cls_confs), np.array(cls_ids) 89 | else: 90 | return None, None, None 91 | 92 | def plot_bbox(self, ori_img, boxes): 93 | img = ori_img 94 | height, width = img.shape[:2] 95 | for box in boxes: 96 | # get x1 x2 x3 x4 97 | x1 = int(round(((box[0] - box[2] / 2.0) * width).item())) 98 | y1 = int(round(((box[1] - box[3] / 2.0) * height).item())) 99 | x2 = int(round(((box[0] + box[2] / 2.0) * width).item())) 100 | y2 = int(round(((box[1] + box[3] / 2.0) * height).item())) 101 | cls_conf = box[5] 102 | cls_id = box[6] 103 | # import random 104 | # color = random.choices(range(256),k=3) 105 | color = [int(x) for x in np.random.randint(256, size=3)] 106 | # put texts and rectangles 107 | img = cv2.putText(img, self.class_names[cls_id], (x1, y1), 108 | cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2) 109 | img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) 110 | return img 111 | 112 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 113 | # Plots one bounding box on image img 114 | tl = line_thickness or round( 115 | 0.002 * max(img.shape[0:2])) + 1 # line thickness 116 | color = color or [random.randint(0, 255) for _ in range(3)] 117 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 118 | cv2.rectangle(img, c1, c2, color, thickness=tl) 119 | if label: 120 | tf = max(tl - 1, 1) # font thickness 121 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, 122 | thickness=tf)[0] 123 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 124 | cv2.rectangle(img, c1, c2, color, -1) # filled 125 | cv2.putText(img, 126 | label, (c1[0], c1[1] - 2), 127 | 0, 128 | tl / 3, [225, 255, 255], 129 | thickness=tf, 130 | lineType=cv2.LINE_AA) 131 | 132 | 133 | if __name__ == "__main__": 134 | ################################################# 135 | cfg = './yolov3-cbam.cfg' 136 | img_size = 416 137 | weight_path = './miniversion/best.pt' 138 | img_file = "./miniversion/test.jpg" #"./images/train2014/0137-2112.jpg" 139 | data_cfg = "./miniversion/dataset1.data" 140 | conf_thres = 0.5 141 | nms_thres = 0.5 142 | device = torch_utils.select_device() 143 | ################################################# 144 | yolo = InferYOLOv3(cfg, img_size, weight_path, data_cfg, device) 145 | # bbox_xcycwh, cls_conf, cls_ids = yolo(img_file) 146 | # print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape) 147 | 148 | img = cv2.imread(img_file) 149 | print(img.shape) 150 | # im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 151 | im = img 152 | print(im.shape) 153 | bbox_xcycwh, cls_conf, cls_ids = yolo.predict(im) 154 | print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape) 155 | 156 | bboxs = [] 157 | for i in range(len(bbox_xcycwh)): 158 | bboxs.append(tuple(int(bbox_xcycwh[i][j].tolist()) for j in range(4))) 159 | 160 | print(bboxs) 161 | 162 | -------------------------------------------------------------------------------- /miniversion/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/miniversion/utils/__init__.py -------------------------------------------------------------------------------- /miniversion/utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | if os.path.exists(name): # remove existing 19 | os.remove(name) 20 | 21 | # Attempt large file download 22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id, 23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 24 | id, name), 25 | 'rm ./cookie'] 26 | r = sum([os.system(x) for x in s]) # run commands, get return zeros 27 | 28 | # Attempt small file download 29 | if not os.path.exists(name): # file size < 40MB 30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id) 31 | r = os.system(s) 32 | 33 | # Error check 34 | if r != 0: 35 | os.system('rm ' + name) # remove partial downloads 36 | print('ERROR: Download failure ') 37 | return r 38 | 39 | # Unzip if archive 40 | if name.endswith('.zip'): 41 | print('unzipping... ', end='') 42 | os.system('unzip -q %s' % name) # unzip 43 | os.remove(name) # remove zip to free space 44 | 45 | print('Done (%.1fs)' % (time.time() - t)) 46 | return r 47 | 48 | 49 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 50 | # Uploads a file to a bucket 51 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 52 | 53 | storage_client = storage.Client() 54 | bucket = storage_client.get_bucket(bucket_name) 55 | blob = bucket.blob(destination_blob_name) 56 | 57 | blob.upload_from_filename(source_file_name) 58 | 59 | print('File {} uploaded to {}.'.format( 60 | source_file_name, 61 | destination_blob_name)) 62 | 63 | 64 | def download_blob(bucket_name, source_blob_name, destination_file_name): 65 | # Uploads a blob from a bucket 66 | storage_client = storage.Client() 67 | bucket = storage_client.get_bucket(bucket_name) 68 | blob = bucket.blob(source_blob_name) 69 | 70 | blob.download_to_filename(destination_file_name) 71 | 72 | print('Blob {} downloaded to {}.'.format( 73 | source_blob_name, 74 | destination_file_name)) 75 | -------------------------------------------------------------------------------- /miniversion/utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | 6 | def parse_model_cfg(path): 7 | # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' 8 | if not path.endswith('.cfg'): # add .cfg suffix if omitted 9 | path += '.cfg' 10 | if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted 11 | path = 'cfg' + os.sep + path 12 | 13 | with open(path, 'r') as f: 14 | lines = f.read().split('\n') 15 | lines = [x for x in lines if x and not x.startswith('#')] 16 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 17 | mdefs = [] # module definitions 18 | for line in lines: 19 | if line.startswith('['): # This marks the start of a new block 20 | mdefs.append({}) 21 | mdefs[-1]['type'] = line[1:-1].rstrip() 22 | if mdefs[-1]['type'] == 'convolutional': 23 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 24 | else: 25 | key, val = line.split("=") 26 | key = key.rstrip() 27 | 28 | if 'anchors' in key: 29 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 30 | else: 31 | mdefs[-1][key] = val.strip() 32 | 33 | # Check all fields are supported 34 | supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', 35 | 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', 36 | 'stride_x', 'stride_y'] 37 | 38 | f = [] # fields 39 | for x in mdefs[1:]: 40 | [f.append(k) for k in x if k not in f] 41 | u = [x for x in f if x not in supported] # unsupported fields 42 | assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) 43 | 44 | return mdefs 45 | 46 | 47 | def parse_data_cfg(path): 48 | # Parses the data configuration file 49 | if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted 50 | path = 'data' + os.sep + path 51 | 52 | with open(path, 'r') as f: 53 | lines = f.readlines() 54 | 55 | options = dict() 56 | for line in lines: 57 | line = line.strip() 58 | if line == '' or line.startswith('#'): 59 | continue 60 | key, val = line.split('=') 61 | options[key.strip()] = val.strip() 62 | 63 | return options 64 | -------------------------------------------------------------------------------- /miniversion/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | 6 | def init_seeds(seed=0): 7 | torch.manual_seed(seed) 8 | torch.cuda.manual_seed(seed) 9 | torch.cuda.manual_seed_all(seed) 10 | 11 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 12 | if seed == 0: 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.benchmark = False 15 | 16 | 17 | def select_device(device='', apex=False, batch_size=None): 18 | # device = 'cpu' or '0' or '0,1,2,3' 19 | cpu_request = device.lower() == 'cpu' 20 | if device and not cpu_request: # if device requested other than 'cpu' 21 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 22 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 23 | 24 | cuda = False if cpu_request else torch.cuda.is_available() 25 | if cuda: 26 | c = 1024 ** 2 # bytes to MB 27 | ng = torch.cuda.device_count() 28 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 29 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 30 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 31 | s = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 32 | for i in range(0, ng): 33 | if i == 1: 34 | s = ' ' * len(s) 35 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 36 | (s, i, x[i].name, x[i].total_memory / c)) 37 | else: 38 | print('Using CPU') 39 | 40 | print('') # skip a line 41 | return torch.device('cuda:0' if cuda else 'cpu') 42 | 43 | 44 | def fuse_conv_and_bn(conv, bn): 45 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 46 | with torch.no_grad(): 47 | # init 48 | fusedconv = torch.nn.Conv2d(conv.in_channels, 49 | conv.out_channels, 50 | kernel_size=conv.kernel_size, 51 | stride=conv.stride, 52 | padding=conv.padding, 53 | bias=True) 54 | 55 | # prepare filters 56 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 57 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 58 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 59 | 60 | # prepare spatial bias 61 | if conv.bias is not None: 62 | b_conv = conv.bias 63 | else: 64 | b_conv = torch.zeros(conv.weight.size(0)) 65 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 66 | fusedconv.bias.copy_(b_conv + b_bn) 67 | 68 | return fusedconv 69 | 70 | 71 | def model_info(model, report='summary'): 72 | # Plots a line-by-line description of a PyTorch model 73 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 74 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 75 | if report is 'full': 76 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 77 | for i, (name, p) in enumerate(model.named_parameters()): 78 | name = name.replace('module_list.', '') 79 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 80 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 81 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g)) 82 | 83 | 84 | def load_classifier(name='resnet101', n=2): 85 | # Loads a pretrained model reshaped to n-class output 86 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision 87 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet') 88 | 89 | # Display model properties 90 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']: 91 | print(x + ' =', eval(x)) 92 | 93 | # Reshape output to n classes 94 | filters = model.last_linear.weight.shape[1] 95 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n)) 96 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters)) 97 | model.last_linear.out_features = n 98 | return model 99 | 100 | 101 | from collections import defaultdict 102 | from torch.optim import Optimizer 103 | 104 | 105 | class Lookahead(Optimizer): 106 | def __init__(self, optimizer, k=5, alpha=0.5): 107 | self.optimizer = optimizer 108 | self.k = k 109 | self.alpha = alpha 110 | self.param_groups = self.optimizer.param_groups 111 | self.state = defaultdict(dict) 112 | self.fast_state = self.optimizer.state 113 | for group in self.param_groups: 114 | group["counter"] = 0 115 | 116 | def update(self, group): 117 | for fast in group["params"]: 118 | param_state = self.state[fast] 119 | if "slow_param" not in param_state: 120 | param_state["slow_param"] = torch.zeros_like(fast.data) 121 | param_state["slow_param"].copy_(fast.data) 122 | slow = param_state["slow_param"] 123 | slow += (fast.data - slow) * self.alpha 124 | fast.data.copy_(slow) 125 | 126 | def update_lookahead(self): 127 | for group in self.param_groups: 128 | self.update(group) 129 | 130 | def step(self, closure=None): 131 | loss = self.optimizer.step(closure) 132 | for group in self.param_groups: 133 | if group["counter"] == 0: 134 | self.update(group) 135 | group["counter"] += 1 136 | if group["counter"] >= self.k: 137 | group["counter"] = 0 138 | return loss 139 | 140 | def state_dict(self): 141 | fast_state_dict = self.optimizer.state_dict() 142 | slow_state = { 143 | (id(k) if isinstance(k, torch.Tensor) else k): v 144 | for k, v in self.state.items() 145 | } 146 | fast_state = fast_state_dict["state"] 147 | param_groups = fast_state_dict["param_groups"] 148 | return { 149 | "fast_state": fast_state, 150 | "slow_state": slow_state, 151 | "param_groups": param_groups, 152 | } 153 | 154 | def load_state_dict(self, state_dict): 155 | slow_state_dict = { 156 | "state": state_dict["slow_state"], 157 | "param_groups": state_dict["param_groups"], 158 | } 159 | fast_state_dict = { 160 | "state": state_dict["fast_state"], 161 | "param_groups": state_dict["param_groups"], 162 | } 163 | super(Lookahead, self).load_state_dict(slow_state_dict) 164 | self.optimizer.load_state_dict(fast_state_dict) 165 | self.fast_state = self.optimizer.state 166 | 167 | def add_param_group(self, param_group): 168 | param_group["counter"] = 0 169 | self.optimizer.add_param_group(param_group) 170 | -------------------------------------------------------------------------------- /miniversion/utils/utils_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),( 0,255, 0),( 0,128, 0),(210,105, 30),(220, 20, 60), 5 | (192,192,192),(255,228,196),( 50,205, 50),(139, 0,139),(100,149,237),(138, 43,226),(238,130,238), 6 | (255, 0,255),( 0,100, 0),(127,255, 0),(255, 0,255),( 0, 0,205),(255,140, 0),(255,239,213), 7 | (199, 21,133),(124,252, 0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47), 8 | (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148, 0,211),(255, 99, 71),(144,238,144), 9 | (255,255, 0),(230,230,250),( 0, 0,255),(128,128, 0),(189,183,107),(255,255,224),(128,128,128), 10 | (105,105,105),( 64,224,208),(205,133, 63),( 0,128,128),( 72,209,204),(139, 69, 19),(255,245,238), 11 | (250,240,230),(152,251,152),( 0,255,255),(135,206,235),( 0,191,255),(176,224,230),( 0,250,154), 12 | (245,255,250),(240,230,140),(245,222,179),( 0,139,139),(143,188,143),(255, 0, 0),(240,128,128), 13 | (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220), 14 | (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)] 15 | 16 | 17 | # def draw_bbox(img, box, cls_name, identity=None, offset=(0,0)): 18 | # ''' 19 | # draw box of an id 20 | # ''' 21 | # x1,y1,x2,y2 = [int(i+offset[idx%2]) for idx,i in enumerate(box)] 22 | # # set color and label text 23 | # color = COLORS_10[identity%len(COLORS_10)] if identity is not None else COLORS_10[0] 24 | # label = '{} {}'.format(cls_name, identity) 25 | # # box text and bar 26 | # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] 27 | # cv2.rectangle(img,(x1, y1),(x2,y2),color,2) 28 | # cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 29 | # cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1) 30 | # return img 31 | 32 | 33 | def plot_one_box(x, ori_img, color=None, label=None, line_thickness=None): 34 | # Plots one bounding box on image img 35 | img = ori_img 36 | tl = line_thickness or round( 37 | 0.002 * max(img.shape[0:2])) + 1 # line thickness 38 | color = color or [random.randint(0, 255) for _ in range(3)] 39 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 40 | cv2.rectangle(img, c1, c2, color, thickness=tl) 41 | if label: 42 | tf = max(tl - 1, 1) # font thickness 43 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 44 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 45 | cv2.rectangle(img, c1, c2, color, -1) # filled 46 | cv2.putText(img, 47 | label, (c1[0], c1[1] - 2), 48 | 0, 49 | tl / 3, [225, 255, 255], 50 | thickness=tf, 51 | lineType=cv2.LINE_AA) 52 | return img 53 | 54 | 55 | ''' 56 | deep sort 中的画图方法,在原图上进行作画 57 | ''' 58 | def draw_bboxes(ori_img, bbox, identities=None, offset=(0,0)): 59 | img = ori_img 60 | for i,box in enumerate(bbox): 61 | x1,y1,x2,y2 = [int(i) for i in box] 62 | x1 += offset[0] 63 | x2 += offset[0] 64 | y1 += offset[1] 65 | y2 += offset[1] 66 | # box text and bar 67 | id = int(identities[i]) if identities is not None else 0 68 | color = COLORS_10[id%len(COLORS_10)] 69 | label = '{}{:d}'.format("", id) 70 | # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 71 | img = plot_one_box([x1,y1,x2,y2], img, color, label) 72 | # cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 73 | # cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 74 | # cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 75 | return img 76 | 77 | 78 | 79 | 80 | 81 | def softmax(x): 82 | assert isinstance(x, np.ndarray), "expect x be a numpy array" 83 | x_exp = np.exp(x*5) 84 | return x_exp/x_exp.sum() 85 | 86 | def softmin(x): 87 | assert isinstance(x, np.ndarray), "expect x be a numpy array" 88 | x_exp = np.exp(-x) 89 | return x_exp/x_exp.sum() 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | x = np.arange(10)/10. 95 | x = np.array([0.5,0.5,0.5,0.6,1.]) 96 | y = softmax(x) 97 | z = softmin(x) 98 | import ipdb; ipdb.set_trace() -------------------------------------------------------------------------------- /miniversion/utils/visdom.py: -------------------------------------------------------------------------------- 1 | import visdom 2 | import time 3 | import numpy as np 4 | 5 | 6 | class Visualizer(object): 7 | def __init__(self, env='default', **kwargs): 8 | self.vis = visdom.Visdom(env=env, **kwargs) 9 | self.index = {} 10 | 11 | def plot_many_stack(self, d): 12 | ''' 13 | self.plot('loss',1.00) 14 | ''' 15 | name = list(d.keys()) 16 | name_total = " ".join(name) 17 | x = self.index.get(name_total, 0) 18 | val = list(d.values()) 19 | if len(val) == 1: 20 | y = np.array(val) 21 | else: 22 | y = np.array(val).reshape(-1, len(val)) 23 | # print(x) 24 | self.vis.line( 25 | Y=y, 26 | X=np.ones(y.shape) * x, 27 | win=str(name_total), # unicode 28 | opts=dict(legend=name, title=name_total), 29 | update=None if x == 0 else 'append') 30 | self.index[name_total] = x + 1 -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | import cv2 4 | import numpy as np 5 | import os 6 | from PIL import Image 7 | 8 | from models import * 9 | from utils.datasets import * 10 | from utils.utils import * 11 | 12 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 13 | 14 | 15 | class InferYOLOv3(object): 16 | def __init__(self, 17 | cfg, 18 | img_size, 19 | weight_path, 20 | data_cfg, 21 | device, 22 | conf_thres=0.5, 23 | nms_thres=0.5): 24 | self.cfg = cfg 25 | self.img_size = img_size 26 | self.weight_path = weight_path 27 | # self.img_file = img_file 28 | self.device = device 29 | self.model = Darknet(cfg).to(device) 30 | self.model.load_state_dict( 31 | torch.load(weight_path, map_location=device)['model']) 32 | self.model.to(device).eval() 33 | self.classes = load_classes(parse_data_cfg(data_cfg)['names']) 34 | self.colors = [random.randint(0, 255) for _ in range(3)] 35 | self.conf_thres = conf_thres 36 | self.nms_thres = nms_thres 37 | 38 | def predict(self, im0): 39 | # singleDataloader = LoadSingleImages(img_file, img_size=img_size) 40 | # path, img, im0 = singleDataloader.__next__() 41 | 42 | img, _, _ = letterbox(im0, new_shape=self.img_size) 43 | 44 | img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB 45 | img = np.ascontiguousarray(img, dtype=np.float32) # uint8 to float32 46 | img /= 255.0 47 | 48 | # TODO: how to get img and im0 49 | 50 | img = torch.from_numpy(img).unsqueeze(0).to(self.device) 51 | pred, _ = self.model(img) 52 | det = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0] 53 | 54 | if det is not None and len(det) > 0: 55 | # Rescale boxes from 416 to true image size 56 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], 57 | im0.shape).round() 58 | 59 | # Print results to screen 60 | # print('%gx%g ' % img.shape[2:], end='') # print image size 61 | for c in det[:, -1].unique(): 62 | n = (det[:, -1] == c).sum() 63 | # print('%g %ss' % (n, self.classes[int(c)]), end=', ') 64 | 65 | img = np.array(img.cpu()) 66 | # Draw bounding boxes and labels of detections 67 | 68 | bboxes, confs, cls_confs, cls_ids = [], [], [], [] 69 | 70 | for *xyxy, conf, cls_conf, cls_id in det: 71 | # label = '%s %.2f' % (classes[int(cls_id)], conf) 72 | bboxes.append(xyxy) 73 | confs.append(conf) 74 | cls_confs.append(cls_conf) 75 | cls_ids.append(cls_id) 76 | # plot_one_box(xyxy, im0, label=label, color=colors) 77 | return np.array(bboxes), np.array(cls_confs), np.array(cls_ids) 78 | else: 79 | return None, None, None 80 | 81 | def plot_bbox(self, ori_img, boxes): 82 | img = ori_img 83 | height, width = img.shape[:2] 84 | for box in boxes: 85 | # get x1 x2 x3 x4 86 | x1 = int(round(((box[0] - box[2] / 2.0) * width).item())) 87 | y1 = int(round(((box[1] - box[3] / 2.0) * height).item())) 88 | x2 = int(round(((box[0] + box[2] / 2.0) * width).item())) 89 | y2 = int(round(((box[1] + box[3] / 2.0) * height).item())) 90 | cls_conf = box[5] 91 | cls_id = box[6] 92 | # import random 93 | # color = random.choices(range(256),k=3) 94 | color = [int(x) for x in np.random.randint(256, size=3)] 95 | # put texts and rectangles 96 | img = cv2.putText(img, self.class_names[cls_id], (x1, y1), 97 | cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2) 98 | img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2) 99 | return img 100 | 101 | def plot_one_box(x, img, color=None, label=None, line_thickness=None): 102 | # Plots one bounding box on image img 103 | tl = line_thickness or round( 104 | 0.002 * max(img.shape[0:2])) + 1 # line thickness 105 | color = color or [random.randint(0, 255) for _ in range(3)] 106 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 107 | cv2.rectangle(img, c1, c2, color, thickness=tl) 108 | if label: 109 | tf = max(tl - 1, 1) # font thickness 110 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, 111 | thickness=tf)[0] 112 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 113 | cv2.rectangle(img, c1, c2, color, -1) # filled 114 | cv2.putText(img, 115 | label, (c1[0], c1[1] - 2), 116 | 0, 117 | tl / 3, [225, 255, 255], 118 | thickness=tf, 119 | lineType=cv2.LINE_AA) 120 | 121 | 122 | if __name__ == "__main__": 123 | ################################################# 124 | cfg = './cfg/yolov3-1cls.cfg' 125 | img_size = 416 126 | weight_path = './weights/best.pt' 127 | img_file = "/home/dongpeijie/datasets/data_with_labelimg/images/train2014/0137-1162.jpg" 128 | data_cfg = "./data/voc_small.data" 129 | conf_thres = 0.5 130 | nms_thres = 0.5 131 | device = torch_utils.select_device() 132 | ################################################# 133 | yolo = InferYOLOv3(cfg, img_size, weight_path, data_cfg, device) 134 | # bbox_xcycwh, cls_conf, cls_ids = yolo(img_file) 135 | # print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape) 136 | 137 | img = cv2.imread(img_file) 138 | im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 139 | im = img 140 | print(im.shape) 141 | bbox_xcycwh, cls_conf, cls_ids = yolo.predict(im) 142 | print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape) 143 | -------------------------------------------------------------------------------- /sort.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import time 4 | import argparse 5 | import torch 6 | import numpy as np 7 | 8 | from predict import InferYOLOv3 9 | from utils.utils import xyxy2xywh 10 | from deep_sort import DeepSort 11 | from utils.utils_sort import COLORS_10, draw_bboxes 12 | from sort.sort import * 13 | 14 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 15 | 16 | 17 | class Detector(object): 18 | def __init__(self, args): 19 | self.args = args 20 | if args.display: 21 | cv2.namedWindow("test", cv2.WINDOW_NORMAL) 22 | cv2.resizeWindow("test", args.display_width, args.display_height) 23 | device = torch.device( 24 | 'cuda') if torch.cuda.is_available() else torch.device('cpu') 25 | self.vdo = cv2.VideoCapture() 26 | self.yolo3 = InferYOLOv3(args.yolo_cfg, 27 | args.img_size, 28 | args.yolo_weights, 29 | args.data_cfg, 30 | device, 31 | conf_thres=args.conf_thresh, 32 | nms_thres=args.nms_thresh) 33 | # self.deepsort = DeepSort(args.deepsort_checkpoint) 34 | self.mot_tracker_sort = Sort() 35 | self.class_names = self.yolo3.classes 36 | 37 | def __enter__(self): 38 | assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error" 39 | self.vdo.open(self.args.VIDEO_PATH) 40 | self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH)) 41 | self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT)) 42 | 43 | if self.args.save_path: 44 | fourcc = cv2.VideoWriter_fourcc(*'MJPG') 45 | self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20, 46 | (self.im_width, self.im_height)) 47 | 48 | assert self.vdo.isOpened() 49 | return self 50 | 51 | def __exit__(self, exc_type, exc_value, exc_traceback): 52 | if exc_type: 53 | print(exc_type, exc_value, exc_traceback) 54 | 55 | def detect(self): 56 | frame_cnt = -1 57 | while self.vdo.grab(): 58 | frame_cnt += 1 59 | start = time.time() 60 | _, ori_im = self.vdo.retrieve() 61 | # im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB) 62 | im = ori_im 63 | 64 | t1_begin = time.time() 65 | bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im) 66 | t1_end = time.time() 67 | 68 | t2_begin = time.time() 69 | if bbox_xxyy is not None: 70 | # select class cow 71 | # mask = cls_ids == 0 72 | # bbox_xxyy = bbox_xxyy[mask] 73 | 74 | # bbox_xxyy[:, 3:] *= 1.2 75 | # cls_conf = cls_conf[mask] 76 | 77 | # bbox_xcycwh = bbox_xxyy 78 | # print(" "*10, bbox_xcycwh.shape, cls_conf.shape) 79 | detections = [] 80 | for i in range(len(bbox_xxyy)): 81 | # print(bbox_xxyy[i][0].item(), bbox_xxyy[i][1].item(), 82 | # bbox_xxyy[i][2].item(), bbox_xxyy[i][3].item(), 83 | # cls_conf[i].tolist()) 84 | detections.append([ 85 | bbox_xxyy[i][0].item(), bbox_xxyy[i][1].item(), 86 | bbox_xxyy[i][2].item(), bbox_xxyy[i][3].item(), 87 | cls_conf[i].tolist() 88 | ]) 89 | # detections.append([*bbox_xcycwh[i].tolist(), cls_conf[i].tolist()]) 90 | # print("=" * 30, [*bbox_xcycwh[i], cls_conf[i]]) 91 | # print('-'*30, detections) 92 | detections = torch.tensor(detections) 93 | outputs = self.mot_tracker_sort.update(detections) 94 | if len(outputs) > 0: 95 | bbox_xyxy = outputs[:, :4] 96 | identities = outputs[:, -1] 97 | ori_im = draw_bboxes(ori_im, bbox_xyxy, identities) 98 | t2_end = time.time() 99 | 100 | end = time.time() 101 | print( 102 | "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f" 103 | % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin), 104 | (end - start), ((t1_end - t1_begin) * 100 / 105 | ((end - start))), (1 / (end - start)))) 106 | if self.args.display: 107 | cv2.imshow("test", ori_im) 108 | cv2.waitKey(1) 109 | 110 | if self.args.save_path: 111 | self.output.write(ori_im) 112 | 113 | 114 | def parse_args(): 115 | parser = argparse.ArgumentParser() 116 | parser.add_argument("VIDEO_PATH", type=str) 117 | parser.add_argument("--yolo_cfg", 118 | type=str, 119 | default="cfg/yolov3-1cls.cfg") 120 | parser.add_argument("--yolo_weights", 121 | type=str, 122 | default="./weights/best.pt") 123 | parser.add_argument("--yolo_names", 124 | type=str, 125 | default="cfg/coco.names") 126 | parser.add_argument("--conf_thresh", type=float, default=0.5) 127 | parser.add_argument("--nms_thresh", type=float, default=0.4) 128 | parser.add_argument("--deepsort_checkpoint", 129 | type=str, 130 | default="deep_sort/deep/checkpoint/best.pt") 131 | parser.add_argument("--max_dist", type=float, default=0.2) 132 | parser.add_argument("--ignore_display", 133 | dest="display", 134 | action="store_false") 135 | parser.add_argument("--display_width", type=int, default=800) 136 | parser.add_argument("--display_height", type=int, default=600) 137 | parser.add_argument("--save_path", type=str, default="demo.avi") 138 | parser.add_argument("--data_cfg", 139 | type=str, 140 | default="data/voc_small.data") 141 | parser.add_argument("--img_size", type=int, default=416, help="img size") 142 | 143 | return parser.parse_args() 144 | 145 | 146 | if __name__ == "__main__": 147 | args = parse_args() 148 | with Detector(args) as det: 149 | det.detect() 150 | 151 | os.system("ffmpeg -y -i demo.avi -r 10 -b:a 32k %s_output.mp4" % 152 | (os.path.basename(args.VIDEO_PATH).split('.')[0])) 153 | -------------------------------------------------------------------------------- /sort/README.md: -------------------------------------------------------------------------------- 1 | SORT 2 | ===== 3 | 4 | A simple online and realtime tracking algorithm for 2D multiple object tracking in video sequences. 5 | See an example [video here](https://motchallenge.net/movies/ETH-Linthescher-SORT.mp4). 6 | 7 | By Alex Bewley 8 | 9 | ### Introduction 10 | 11 | SORT is a barebones implementation of a visual multiple object tracking framework based on rudimentary data association and state estimation techniques. It is designed for online tracking applications where only past and current frames are available and the method produces object identities on the fly. While this minimalistic tracker doesn't handle occlusion or re-entering objects its purpose is to serve as a baseline and testbed for the development of future trackers. 12 | 13 | SORT was initially described in an [arXiv tech report](http://arxiv.org/abs/1602.00763). At the time of the initial publication, SORT was ranked the best *open source* multiple object tracker on the [MOT benchmark](https://motchallenge.net/results/2D_MOT_2015/). 14 | 15 | This code has been tested on Mac OSX 10.10, and Ubuntu 14.04, with Python 2.7 (anaconda). 16 | 17 | **Note:** A significant proportion of SORT's accuracy is attributed to the detections. 18 | For your convenience, this repo also contains *Faster* RCNN detections for the MOT benchmark sequences in the [benchmark format](https://motchallenge.net/instructions/). To run the detector yourself please see the original [*Faster* RCNN project](https://github.com/ShaoqingRen/faster_rcnn) or the python reimplementation of [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) by Ross Girshick. 19 | 20 | **Also see:** 21 | A new and improved version of SORT with a Deep Association Metric implemented in tensorflow is available at [https://github.com/nwojke/deep_sort](https://github.com/nwojke/deep_sort) . 22 | 23 | ### License 24 | 25 | SORT is released under the GPL License (refer to the LICENSE file for details) to promote the open use of the tracker and future improvements. If you require a permissive license contact Alex (alex@bewley.ai). 26 | 27 | ### Citing SORT 28 | 29 | If you find this repo useful in your research, please consider citing: 30 | 31 | @inproceedings{Bewley2016_sort, 32 | author={Bewley, Alex and Ge, Zongyuan and Ott, Lionel and Ramos, Fabio and Upcroft, Ben}, 33 | booktitle={2016 IEEE International Conference on Image Processing (ICIP)}, 34 | title={Simple online and realtime tracking}, 35 | year={2016}, 36 | pages={3464-3468}, 37 | keywords={Benchmark testing;Complexity theory;Detectors;Kalman filters;Target tracking;Visualization;Computer Vision;Data Association;Detection;Multiple Object Tracking}, 38 | doi={10.1109/ICIP.2016.7533003} 39 | } 40 | 41 | 42 | ### Dependencies: 43 | 44 | This code makes use of the following packages: 45 | 1. [`scikit-learn`](http://scikit-learn.org/stable/) 46 | 0. [`scikit-image`](http://scikit-image.org/download) 47 | 0. [`FilterPy`](https://github.com/rlabbe/filterpy) 48 | 49 | To install required dependencies run: 50 | ``` 51 | $ pip install -r requirements.txt 52 | ``` 53 | 54 | 55 | ### Demo: 56 | 57 | To run the tracker with the provided detections: 58 | 59 | ``` 60 | $ cd path/to/sort 61 | $ python sort.py 62 | ``` 63 | 64 | To display the results you need to: 65 | 66 | 0. Download the [2D MOT 2015 benchmark dataset](https://motchallenge.net/data/2D_MOT_2015/#download) 67 | 0. Create a symbolic link to the dataset 68 | ``` 69 | $ ln -s /path/to/MOT2015_challenge/data/2DMOT2015 mot_benchmark 70 | ``` 71 | 0. Run the demo with the ```--display``` flag 72 | ``` 73 | $ python sort.py --display 74 | ``` 75 | 76 | 77 | ### Main Results 78 | 79 | Using the [MOT challenge devkit](https://motchallenge.net/devkit/) the method produces the following results (as described in the paper). 80 | 81 | Sequence | Rcll | Prcn | FAR | GT MT PT ML| FP FN IDs FM| MOTA MOTP MOTAL 82 | --------------- |:----:|:----:|:----:|:-------------:|:-------------------:|:------------------: 83 | TUD-Campus | 68.5 | 94.3 | 0.21 | 8 6 2 0| 15 113 6 9| 62.7 73.7 64.1 84 | ETH-Sunnyday | 77.5 | 81.9 | 0.90 | 30 11 16 3| 319 418 22 54| 59.1 74.4 60.3 85 | ETH-Pedcross2 | 51.9 | 90.8 | 0.39 | 133 17 60 56| 330 3014 77 103| 45.4 74.8 46.6 86 | ADL-Rundle-8 | 44.3 | 75.8 | 1.47 | 28 6 16 6| 959 3781 103 211| 28.6 71.1 30.1 87 | Venice-2 | 42.5 | 64.8 | 2.75 | 26 7 9 10| 1650 4109 57 106| 18.6 73.4 19.3 88 | KITTI-17 | 67.1 | 92.3 | 0.26 | 9 1 8 0| 38 225 9 16| 60.2 72.3 61.3 89 | *Overall* | 49.5 | 77.5 | 1.24 | 234 48 111 75| 3311 11660 274 499| 34.0 73.3 35.1 90 | 91 | 92 | ### Using SORT in your own project 93 | 94 | Below is the gist of how to instantiate and update SORT. See the ['__main__'](https://github.com/abewley/sort/blob/master/sort.py#L239) section of [sort.py](https://github.com/abewley/sort/blob/master/sort.py#L239) for a complete example. 95 | 96 | from sort import * 97 | 98 | #create instance of SORT 99 | mot_tracker = Sort() 100 | 101 | # get detections 102 | ... 103 | 104 | # update SORT 105 | track_bbs_ids = mot_tracker.update(detections) 106 | 107 | # track_bbs_ids is a np array where each row contains a valid bounding box and track_id (last column) 108 | ... 109 | 110 | 111 | -------------------------------------------------------------------------------- /sort/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/sort/__init__.py -------------------------------------------------------------------------------- /sort/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | filterpy==1.4.1 3 | numba==0.38.1 4 | scikit-image==0.14.0 5 | scikit-learn==0.19.1 6 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/utils/__init__.py -------------------------------------------------------------------------------- /utils/anchor_cluster.py: -------------------------------------------------------------------------------- 1 | #coding=utf-8 2 | import xml.etree.ElementTree as ET 3 | import numpy as np 4 | import glob 5 | 6 | 7 | def iou(box, clusters): 8 | """ 9 | 计算一个ground truth边界盒和k个先验框(Anchor)的交并比(IOU)值。 10 | 参数box: 元组或者数据,代表ground truth的长宽。 11 | 参数clusters: 形如(k,2)的numpy数组,其中k是聚类Anchor框的个数 12 | 返回:ground truth和每个Anchor框的交并比。 13 | """ 14 | x = np.minimum(clusters[:, 0], box[0]) 15 | y = np.minimum(clusters[:, 1], box[1]) 16 | if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0: 17 | raise ValueError("Box has no area") 18 | intersection = x * y 19 | box_area = box[0] * box[1] 20 | cluster_area = clusters[:, 0] * clusters[:, 1] 21 | iou_ = intersection / (box_area + cluster_area - intersection) 22 | return iou_ 23 | 24 | 25 | def avg_iou(boxes, clusters): 26 | """ 27 | 计算一个ground truth和k个Anchor的交并比的均值。 28 | """ 29 | return np.mean( 30 | [np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])]) 31 | 32 | 33 | def kmeans(boxes, k, dist=np.median): 34 | """ 35 | 利用IOU值进行K-means聚类 36 | 参数boxes: 形状为(r, 2)的ground truth框,其中r是ground truth的个数 37 | 参数k: Anchor的个数 38 | 参数dist: 距离函数 39 | 返回值:形状为(k, 2)的k个Anchor框 40 | """ 41 | # 即是上面提到的r 42 | rows = boxes.shape[0] 43 | # 距离数组,计算每个ground truth和k个Anchor的距离 44 | distances = np.empty((rows, k)) 45 | # 上一次每个ground truth"距离"最近的Anchor索引 46 | last_clusters = np.zeros((rows, )) 47 | # 设置随机数种子 48 | np.random.seed() 49 | 50 | # 初始化聚类中心,k个簇,从r个ground truth随机选k个 51 | clusters = boxes[np.random.choice(rows, k, replace=False)] 52 | # 开始聚类 53 | while True: 54 | # 计算每个ground truth和k个Anchor的距离,用1-IOU(box,anchor)来计算 55 | for row in range(rows): 56 | distances[row] = 1 - iou(boxes[row], clusters) 57 | # 对每个ground truth,选取距离最小的那个Anchor,并存下索引 58 | nearest_clusters = np.argmin(distances, axis=1) 59 | # 如果当前每个ground truth"距离"最近的Anchor索引和上一次一样,聚类结束 60 | if (last_clusters == nearest_clusters).all(): 61 | break 62 | # 更新簇中心为簇里面所有的ground truth框的均值 63 | for cluster in range(k): 64 | clusters[cluster] = dist(boxes[nearest_clusters == cluster], 65 | axis=0) 66 | # 更新每个ground truth"距离"最近的Anchor索引 67 | last_clusters = nearest_clusters 68 | 69 | return clusters 70 | 71 | 72 | # 加载自己的数据集,只需要所有labelimg标注出来的xml文件即可 73 | def load_dataset(path): 74 | dataset = [] 75 | for xml_file in glob.glob("{}/*xml".format(path)): 76 | tree = ET.parse(xml_file) 77 | # 图片高度 78 | height = int(tree.findtext("./size/height")) 79 | # 图片宽度 80 | width = int(tree.findtext("./size/width")) 81 | 82 | for obj in tree.iter("object"): 83 | # 偏移量 84 | xmin = int(obj.findtext("bndbox/xmin")) / width 85 | ymin = int(obj.findtext("bndbox/ymin")) / height 86 | xmax = int(obj.findtext("bndbox/xmax")) / width 87 | ymax = int(obj.findtext("bndbox/ymax")) / height 88 | xmin = np.float64(xmin) 89 | ymin = np.float64(ymin) 90 | xmax = np.float64(xmax) 91 | ymax = np.float64(ymax) 92 | if xmax == xmin or ymax == ymin: 93 | print(xml_file) 94 | # 将Anchor的长宽放入dateset,运行kmeans获得Anchor 95 | dataset.append([xmax - xmin, ymax - ymin]) 96 | return np.array(dataset) 97 | 98 | 99 | if __name__ == '__main__': 100 | 101 | ANNOTATIONS_PATH = r"I:\Dataset\datasets1\VOC2007\Annotations" 102 | #"/home/dongpeijie/datasets/voc2007_for_yolo_torch-master/Annotations" #xml文件所在文件夹 103 | CLUSTERS = 9 #聚类数量,anchor数量 104 | INPUTDIM = 416 #输入网络大小 105 | 106 | data = load_dataset(ANNOTATIONS_PATH) 107 | 108 | out = kmeans(data, k=CLUSTERS) 109 | 110 | print('Boxes:') 111 | # print(np.array(out) * INPUTDIM) 112 | 113 | anchors = np.array(out)*INPUTDIM 114 | 115 | anchors = np.sort(anchors, axis=0*1) 116 | 117 | print("=================") 118 | for i in range(len(anchors)): 119 | print("%.2f,%.2f, " % (anchors[i][0], anchors[i][1]), end="") 120 | 121 | print("\n=================") 122 | 123 | print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100)) 124 | final_anchors = np.around(out[:, 0] / out[:, 1], decimals=2).tolist() 125 | print("Before Sort Ratios:\n {}".format(final_anchors)) 126 | print("After Sort Ratios:\n {}".format(sorted(final_anchors))) -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | if os.path.exists(name): # remove existing 19 | os.remove(name) 20 | 21 | # Attempt large file download 22 | s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id, 23 | "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 24 | id, name), 25 | 'rm ./cookie'] 26 | r = sum([os.system(x) for x in s]) # run commands, get return zeros 27 | 28 | # Attempt small file download 29 | if not os.path.exists(name): # file size < 40MB 30 | s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id) 31 | r = os.system(s) 32 | 33 | # Error check 34 | if r != 0: 35 | os.system('rm ' + name) # remove partial downloads 36 | print('ERROR: Download failure ') 37 | return r 38 | 39 | # Unzip if archive 40 | if name.endswith('.zip'): 41 | print('unzipping... ', end='') 42 | os.system('unzip -q %s' % name) # unzip 43 | os.remove(name) # remove zip to free space 44 | 45 | print('Done (%.1fs)' % (time.time() - t)) 46 | return r 47 | 48 | 49 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 50 | # Uploads a file to a bucket 51 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 52 | 53 | storage_client = storage.Client() 54 | bucket = storage_client.get_bucket(bucket_name) 55 | blob = bucket.blob(destination_blob_name) 56 | 57 | blob.upload_from_filename(source_file_name) 58 | 59 | print('File {} uploaded to {}.'.format( 60 | source_file_name, 61 | destination_blob_name)) 62 | 63 | 64 | def download_blob(bucket_name, source_blob_name, destination_file_name): 65 | # Uploads a blob from a bucket 66 | storage_client = storage.Client() 67 | bucket = storage_client.get_bucket(bucket_name) 68 | blob = bucket.blob(source_blob_name) 69 | 70 | blob.download_to_filename(destination_file_name) 71 | 72 | print('Blob {} downloaded to {}.'.format( 73 | source_blob_name, 74 | destination_file_name)) 75 | -------------------------------------------------------------------------------- /utils/layers.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | 4 | ''' 5 | shape对应的数必须是奇数 6 | 7 | [spatialmaxpool] 8 | # 52x52 26x26 13x13 9 | from=75, 70, 62 10 | shape=13, 13, 13 11 | out_plane = 128 12 | ''' 13 | class SpatialMaxpool(nn.Module): 14 | def __init__(self, shapes, filters, out_plane=128): 15 | # shapes: type=list 16 | # filters: type=list 17 | super(SpatialMaxpool, self).__init__() 18 | 19 | self.spp1 = nn.MaxPool2d( # 52 20 | kernel_size=shapes[0], 21 | stride=1, 22 | padding=int((shapes[0] - 1) // 2)) 23 | self.conv1x1_1 = nn.Conv2d(filters[0], out_plane, kernel_size=3, 24 | stride=2, 25 | padding=1) 26 | 27 | self.spp2 = nn.MaxPool2d( # 26 28 | kernel_size=shapes[1], 29 | stride=1, 30 | padding=int((shapes[1] - 1) // 2)) 31 | self.conv1x1_2 = nn.Conv2d(filters[1], out_plane, kernel_size=1, 32 | stride=1, 33 | padding=0) 34 | 35 | self.spp3 = nn.MaxPool2d( # 13 36 | kernel_size=shapes[2], 37 | stride=1, 38 | padding=int((shapes[2] - 1) // 2)) 39 | self.conv1x1_3 = nn.Conv2d(filters[2], out_plane, kernel_size=1, 40 | stride=1, 41 | padding=0) 42 | 43 | self.us_spp3 = nn.Upsample(scale_factor=2, mode='nearest') 44 | 45 | def forward(self, x1, x2, x3): 46 | # 52 26 13 47 | out1 = self.conv1x1_1(self.spp1(x1)) 48 | out2 = self.conv1x1_2(self.spp2(x2)) 49 | out3 = self.us_spp3(self.conv1x1_3(self.spp3(x3))) 50 | return out1+out2+out3 51 | 52 | 53 | ''' 54 | 并不是常规的se,而是特殊的se 55 | # layer=80 56 | [se] 57 | # attention feature 58 | from=62, -1 59 | reduction=4 60 | out_plane=256# 这个地方要跟上边的值保持一致 61 | ''' 62 | 63 | class SpecialSE(nn.Module): 64 | def __init__(self, in_plane, out_plane, reduction=4): 65 | super(SpecialSE, self).__init__() 66 | self.out_plane = out_plane 67 | self.gap = nn.AdaptiveAvgPool2d(1) 68 | self.fc = nn.Sequential( 69 | nn.Linear(in_plane, in_plane//4, bias=False), 70 | nn.ReLU(inplace=True), 71 | nn.Linear(in_plane//4, out_plane, bias=False), 72 | nn.Sigmoid() 73 | ) 74 | 75 | def forward(self, attention, y): 76 | # apply the attention extracted from x to y 77 | b, c, _, _ = attention.size() 78 | attention = self.gap(attention).view(b, c) 79 | channel_attention = self.fc(attention).view(b, self.out_plane, 1, 1) 80 | return channel_attention * y 81 | 82 | 83 | if __name__ == "__main__": 84 | model=SpatialMaxpool(shapes=[13, 13, 13], filters=[128, 128, 512],out_plane=256) 85 | 86 | x1 = torch.zeros((3, 128, 52, 52)) 87 | x2 = torch.zeros((3, 128, 26, 26)) 88 | x3 = torch.zeros((3, 512, 13, 13)) 89 | 90 | print(model(x1,x2,x3).shape) 91 | 92 | # # attention, feature 93 | # model = SpecialSE(512, 256, reduction=4) 94 | 95 | # x1 = torch.zeros(4, 512, 13, 13) 96 | # y1 = torch.zeros(4, 256, 26, 26) 97 | 98 | # # attention, feature 99 | # print(model(x1, y1).shape) 100 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | 5 | 6 | def parse_model_cfg(path): 7 | # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' 8 | if not path.endswith('.cfg'): # add .cfg suffix if omitted 9 | path += '.cfg' 10 | # add cfg/ prefix if omitted 11 | if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): 12 | path = 'cfg' + os.sep + path 13 | 14 | with open(path, 'r') as f: 15 | lines = f.read().split('\n') 16 | lines = [x for x in lines if x and not x.startswith('#')] 17 | lines = [x.rstrip().lstrip() 18 | for x in lines] # get rid of fringe whitespaces 19 | mdefs = [] # module definitions 20 | for line in lines: 21 | if line.startswith('['): # This marks the start of a new block 22 | mdefs.append({}) 23 | mdefs[-1]['type'] = line[1:-1].rstrip() 24 | if mdefs[-1]['type'] == 'convolutional': 25 | # pre-populate with zeros (may be overwritten later) 26 | mdefs[-1]['batch_normalize'] = 0 27 | else: 28 | key, val = line.split("=") 29 | key = key.rstrip() 30 | 31 | if 'anchors' in key: 32 | # np anchors 33 | mdefs[-1][key] = np.array([float(x) 34 | for x in val.split(',')]).reshape((-1, 2)) 35 | else: 36 | mdefs[-1][key] = val.strip() 37 | 38 | # Check all fields are supported 39 | supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', 40 | 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', 41 | 'stride_x', 'stride_y', 'reduction', 'out_plane', 'shape'] 42 | 43 | f = [] # fields 44 | for x in mdefs[1:]: 45 | [f.append(k) for k in x if k not in f] 46 | u = [x for x in f if x not in supported] # unsupported fields 47 | assert not any( 48 | u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) 49 | 50 | return mdefs 51 | 52 | 53 | def parse_data_cfg(path): 54 | # Parses the data configuration file 55 | # add data/ prefix if omitted 56 | if not os.path.exists(path) and os.path.exists('data' + os.sep + path): 57 | path = 'data' + os.sep + path 58 | 59 | with open(path, 'r') as f: 60 | lines = f.readlines() 61 | 62 | options = dict() 63 | for line in lines: 64 | line = line.strip() 65 | if line == '' or line.startswith('#'): 66 | continue 67 | key, val = line.split('=') 68 | options[key.strip()] = val.strip() 69 | 70 | return options 71 | -------------------------------------------------------------------------------- /utils/process_darklabel.py: -------------------------------------------------------------------------------- 1 | import os 2 | ''' 3 | gt.txt: 4 | --------- 5 | frame(从1开始计), id, box(left top w, h),ignore=1(不忽略), class=1(从1开始),覆盖=1), 6 | 1,1,1363,569,103,241,1,1,0.86014 7 | 2,1,1362,568,103,241,1,1,0.86173 8 | 3,1,1362,568,103,241,1,1,0.86173 9 | 4,1,1362,568,103,241,1,1,0.86173 10 | 11 | cutout24_gt.txt 12 | --- 13 | frame(从0开始计), 数量, id(从0开始), box(x1,y1,x2,y2), class=null 14 | 0,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null 15 | 1,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null 16 | 2,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null 17 | ''' 18 | 19 | 20 | def xyxy2xywh(x): 21 | # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] 22 | # y = torch.zeros_like(x) if isinstance(x, 23 | # torch.Tensor) else np.zeros_like(x) 24 | y = [0, 0, 0, 0] 25 | 26 | y[0] = (x[0] + x[2]) / 2 27 | y[1] = (x[1] + x[3]) / 2 28 | y[2] = x[2] - x[0] 29 | y[3] = x[3] - x[1] 30 | return y 31 | 32 | def process_darklabel(video_label_path, mot_label_path): 33 | f = open(video_label_path, "r") 34 | f_o = open(mot_label_path, "w") 35 | 36 | contents = f.readlines() 37 | 38 | for line in contents: 39 | line = line[:-1] 40 | num_list = [num for num in line.split(',')] 41 | 42 | frame_id = int(num_list[0]) + 1 43 | total_num = int(num_list[1]) 44 | 45 | base = 2 46 | 47 | for i in range(total_num): 48 | 49 | print(base, base + i * 6, base + i * 6 + 4) 50 | 51 | _id = int(num_list[base + i * 6]) + 1 52 | _box_x1 = int(num_list[base + i * 6 + 1]) 53 | _box_y1 = int(num_list[base + i * 6 + 2]) 54 | _box_x2 = int(num_list[base + i * 6 + 3]) 55 | _box_y2 = int(num_list[base + i * 6 + 4]) 56 | 57 | y = xyxy2xywh([_box_x1, _box_y1, _box_x2, _box_y2]) 58 | 59 | write_line = "%d,%d,%d,%d,%d,%d,1,1,1\n" % (frame_id, _id, y[0], 60 | y[1], y[2], y[3]) 61 | 62 | f_o.write(write_line) 63 | 64 | f.close() 65 | f_o.close() 66 | 67 | if __name__ == "__main__": 68 | root_dir = "./data/videosample" 69 | 70 | for item in os.listdir(root_dir): 71 | full_path = os.path.join(root_dir, item) 72 | 73 | video_path = os.path.join(full_path, item+".mp4") 74 | video_label_path = os.path.join(full_path, item + "_gt.txt") 75 | mot_label_path = os.path.join(full_path, "gt.txt") 76 | process_darklabel(video_label_path, mot_label_path) 77 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | 5 | 6 | def init_seeds(seed=0): 7 | torch.manual_seed(seed) 8 | torch.cuda.manual_seed(seed) 9 | torch.cuda.manual_seed_all(seed) 10 | 11 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 12 | if seed == 0: 13 | torch.backends.cudnn.deterministic = True 14 | torch.backends.cudnn.benchmark = False 15 | 16 | 17 | def select_device(device='', apex=False, batch_size=None): 18 | # device = 'cpu' or '0' or '0,1,2,3' 19 | cpu_request = device.lower() == 'cpu' 20 | if device and not cpu_request: # if device requested other than 'cpu' 21 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 22 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 23 | 24 | cuda = False if cpu_request else torch.cuda.is_available() 25 | if cuda: 26 | c = 1024 ** 2 # bytes to MB 27 | ng = torch.cuda.device_count() 28 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 29 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 30 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 31 | s = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 32 | for i in range(0, ng): 33 | if i == 1: 34 | s = ' ' * len(s) 35 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 36 | (s, i, x[i].name, x[i].total_memory / c)) 37 | else: 38 | print('Using CPU') 39 | 40 | print('') # skip a line 41 | return torch.device('cuda:0' if cuda else 'cpu') 42 | 43 | 44 | def fuse_conv_and_bn(conv, bn): 45 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 46 | with torch.no_grad(): 47 | # init 48 | fusedconv = torch.nn.Conv2d(conv.in_channels, 49 | conv.out_channels, 50 | kernel_size=conv.kernel_size, 51 | stride=conv.stride, 52 | padding=conv.padding, 53 | bias=True) 54 | 55 | # prepare filters 56 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 57 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 58 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 59 | 60 | # prepare spatial bias 61 | if conv.bias is not None: 62 | b_conv = conv.bias 63 | else: 64 | b_conv = torch.zeros(conv.weight.size(0)) 65 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 66 | fusedconv.bias.copy_(b_conv + b_bn) 67 | 68 | return fusedconv 69 | 70 | 71 | def model_info(model, report='summary'): 72 | # Plots a line-by-line description of a PyTorch model 73 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 74 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 75 | if report is 'full': 76 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 77 | for i, (name, p) in enumerate(model.named_parameters()): 78 | name = name.replace('module_list.', '') 79 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 80 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 81 | print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g)) 82 | 83 | 84 | def load_classifier(name='resnet101', n=2): 85 | # Loads a pretrained model reshaped to n-class output 86 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision 87 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet') 88 | 89 | # Display model properties 90 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']: 91 | print(x + ' =', eval(x)) 92 | 93 | # Reshape output to n classes 94 | filters = model.last_linear.weight.shape[1] 95 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n)) 96 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters)) 97 | model.last_linear.out_features = n 98 | return model 99 | 100 | 101 | from collections import defaultdict 102 | from torch.optim import Optimizer 103 | 104 | 105 | class Lookahead(Optimizer): 106 | def __init__(self, optimizer, k=5, alpha=0.5): 107 | self.optimizer = optimizer 108 | self.k = k 109 | self.alpha = alpha 110 | self.param_groups = self.optimizer.param_groups 111 | self.state = defaultdict(dict) 112 | self.fast_state = self.optimizer.state 113 | for group in self.param_groups: 114 | group["counter"] = 0 115 | 116 | def update(self, group): 117 | for fast in group["params"]: 118 | param_state = self.state[fast] 119 | if "slow_param" not in param_state: 120 | param_state["slow_param"] = torch.zeros_like(fast.data) 121 | param_state["slow_param"].copy_(fast.data) 122 | slow = param_state["slow_param"] 123 | slow += (fast.data - slow) * self.alpha 124 | fast.data.copy_(slow) 125 | 126 | def update_lookahead(self): 127 | for group in self.param_groups: 128 | self.update(group) 129 | 130 | def step(self, closure=None): 131 | loss = self.optimizer.step(closure) 132 | for group in self.param_groups: 133 | if group["counter"] == 0: 134 | self.update(group) 135 | group["counter"] += 1 136 | if group["counter"] >= self.k: 137 | group["counter"] = 0 138 | return loss 139 | 140 | def state_dict(self): 141 | fast_state_dict = self.optimizer.state_dict() 142 | slow_state = { 143 | (id(k) if isinstance(k, torch.Tensor) else k): v 144 | for k, v in self.state.items() 145 | } 146 | fast_state = fast_state_dict["state"] 147 | param_groups = fast_state_dict["param_groups"] 148 | return { 149 | "fast_state": fast_state, 150 | "slow_state": slow_state, 151 | "param_groups": param_groups, 152 | } 153 | 154 | def load_state_dict(self, state_dict): 155 | slow_state_dict = { 156 | "state": state_dict["slow_state"], 157 | "param_groups": state_dict["param_groups"], 158 | } 159 | fast_state_dict = { 160 | "state": state_dict["fast_state"], 161 | "param_groups": state_dict["param_groups"], 162 | } 163 | super(Lookahead, self).load_state_dict(slow_state_dict) 164 | self.optimizer.load_state_dict(fast_state_dict) 165 | self.fast_state = self.optimizer.state 166 | 167 | def add_param_group(self, param_group): 168 | param_group["counter"] = 0 169 | self.optimizer.add_param_group(param_group) 170 | -------------------------------------------------------------------------------- /utils/tsne_vis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import argparse 4 | import numpy as np 5 | import matplotlib.pyplot as plt 6 | from matplotlib import offsetbox 7 | from sklearn import (manifold, datasets, decomposition, ensemble, 8 | discriminant_analysis, random_projection, neighbors) 9 | 10 | 11 | class tSNE_Visual(): 12 | def __init__(self): 13 | super(tSNE_Visual, self).__init__() 14 | self.parser = argparse.ArgumentParser() 15 | self.parser.add_argument('--Input', 16 | type=str, 17 | default='data', 18 | help='the path of target dataset') 19 | self.parser.add_argument('--Size', 20 | type=int, 21 | default=400, 22 | help='the size of every class') 23 | self.parser.add_argument('--Zoom', 24 | type=float, 25 | default=0.1, 26 | help='the size of every class') 27 | self.parser.add_argument('--Output', 28 | type=str, 29 | default='t-SNE1.png', 30 | help='the out path of result image') 31 | 32 | def parse(self): 33 | self.opt = self.parser.parse_args() 34 | args = vars(self.opt) 35 | print('\n--- load options ---') 36 | for name, value in sorted(args.items()): 37 | print('%s: %s' % (str(name), str(value))) 38 | return self.opt 39 | 40 | def plot_embedding(self, X, _output, zoom, title=None): 41 | x_min, x_max = np.min(X, 0), np.max(X, 0) 42 | X = (X - x_min) / (x_max - x_min) 43 | 44 | plt.figure(figsize=(20, 20)) 45 | ax = plt.subplot(111) 46 | 47 | if hasattr(offsetbox, 'AnnotationBbox'): 48 | # only print thumbnails with matplotlib > 1.0 49 | shown_images = np.array([[1., 1.]]) # just something big 50 | for i in range(X.shape[0]): 51 | dist = np.sum((X[i] - shown_images)**2, 1) 52 | #if np.min(dist) < 4e-3: 53 | # don't show points that are too close 54 | # continue 55 | shown_images = np.r_[shown_images, [X[i]]] 56 | imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage( 57 | real_imgs[i], zoom=0.12, cmap=plt.cm.gray_r), 58 | X[i], 59 | pad=0) 60 | ax.add_artist(imagebox) 61 | '''for i in range(X.shape[0]): 62 | #cls = plt.text(X[i, 0], X[i, 1], _classes[y[i][0].astype(int)-1], 63 | cls = plt.text(X[i, 0], X[i, 1], str(y[i].astype(int)), 64 | #cls = plt.text(X[i, 0], X[i, 1], '★', 65 | color=_colors[int(y[i][0]-1)], 66 | fontdict={'weight': 'bold', 'size': 12}) 67 | cls.set_zorder(20) ''' 68 | 69 | ax.spines['top'].set_visible(False) 70 | ax.spines['right'].set_visible(False) 71 | ax.spines['bottom'].set_visible(False) 72 | ax.spines['left'].set_visible(False) 73 | plt.xticks([]), plt.yticks([]) 74 | if title is not None: 75 | plt.title(title) 76 | plt.savefig(_output) 77 | 78 | 79 | if __name__ == '__main__': 80 | # Disable the GUI matplotlib 81 | plt.switch_backend('agg') 82 | 83 | tsne_visual = tSNE_Visual() 84 | opts = tsne_visual.parse() 85 | dataroot = opts.Input 86 | _size = opts.Size 87 | _output = opts.Output 88 | _zoom = opts.Zoom 89 | 90 | dirs = [] 91 | for item in os.listdir(dataroot): 92 | if ('.ipynb_checkpoints' not in item): 93 | dirs.append(item) 94 | 95 | _len = len(dirs) 96 | y = np.zeros((_size * _len, 1)) 97 | for i in range(_len): 98 | y[i * _size:(i + 1) * _size] = i + 1 99 | 100 | imgs = [] 101 | real_imgs = [] 102 | for i in range(_len): 103 | single_cls = [] 104 | path = os.path.join(dataroot, dirs[i]) 105 | dataset_list = os.listdir(path) 106 | cnt = 0 107 | for item in dataset_list: 108 | if (cnt == _size): 109 | break 110 | if ('.ipynb_checkpoints' in item): 111 | continue 112 | data_path = os.path.join(path, item) 113 | temp = cv2.imread(data_path) 114 | real_img = cv2.cvtColor(temp, cv2.COLOR_BGR2RGB) 115 | imgs.append(temp.reshape(-1)) 116 | real_imgs.append(real_img) 117 | cnt = cnt + 1 118 | np_imgs = np.array(imgs) 119 | real_imgs = np.array(real_imgs) 120 | 121 | tsne = manifold.TSNE(n_components=2, init='random', random_state=0) 122 | print(np_imgs.shape) 123 | result = tsne.fit_transform(np_imgs) 124 | 125 | tsne_visual.plot_embedding(X=result, _output=_output, zoom=_zoom) -------------------------------------------------------------------------------- /utils/utils_sort.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),( 0,255, 0),( 0,128, 0),(210,105, 30),(220, 20, 60), 5 | (192,192,192),(255,228,196),( 50,205, 50),(139, 0,139),(100,149,237),(138, 43,226),(238,130,238), 6 | (255, 0,255),( 0,100, 0),(127,255, 0),(255, 0,255),( 0, 0,205),(255,140, 0),(255,239,213), 7 | (199, 21,133),(124,252, 0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47), 8 | (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148, 0,211),(255, 99, 71),(144,238,144), 9 | (255,255, 0),(230,230,250),( 0, 0,255),(128,128, 0),(189,183,107),(255,255,224),(128,128,128), 10 | (105,105,105),( 64,224,208),(205,133, 63),( 0,128,128),( 72,209,204),(139, 69, 19),(255,245,238), 11 | (250,240,230),(152,251,152),( 0,255,255),(135,206,235),( 0,191,255),(176,224,230),( 0,250,154), 12 | (245,255,250),(240,230,140),(245,222,179),( 0,139,139),(143,188,143),(255, 0, 0),(240,128,128), 13 | (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220), 14 | (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)] 15 | 16 | 17 | # def draw_bbox(img, box, cls_name, identity=None, offset=(0,0)): 18 | # ''' 19 | # draw box of an id 20 | # ''' 21 | # x1,y1,x2,y2 = [int(i+offset[idx%2]) for idx,i in enumerate(box)] 22 | # # set color and label text 23 | # color = COLORS_10[identity%len(COLORS_10)] if identity is not None else COLORS_10[0] 24 | # label = '{} {}'.format(cls_name, identity) 25 | # # box text and bar 26 | # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] 27 | # cv2.rectangle(img,(x1, y1),(x2,y2),color,2) 28 | # cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 29 | # cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1) 30 | # return img 31 | 32 | 33 | def plot_one_box(x, ori_img, color=None, label=None, line_thickness=None): 34 | # Plots one bounding box on image img 35 | img = ori_img 36 | tl = line_thickness or round( 37 | 0.002 * max(img.shape[0:2])) + 1 # line thickness 38 | color = color or [random.randint(0, 255) for _ in range(3)] 39 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 40 | cv2.rectangle(img, c1, c2, color, thickness=tl) 41 | if label: 42 | tf = max(tl - 1, 1) # font thickness 43 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 44 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 45 | cv2.rectangle(img, c1, c2, color, -1) # filled 46 | cv2.putText(img, 47 | label, (c1[0], c1[1] - 2), 48 | 0, 49 | tl / 3, [225, 255, 255], 50 | thickness=tf, 51 | lineType=cv2.LINE_AA) 52 | return img 53 | 54 | 55 | ''' 56 | deep sort 中的画图方法,在原图上进行作画 57 | ''' 58 | def draw_bboxes(ori_img, bbox, identities=None, offset=(0,0)): 59 | img = ori_img 60 | for i,box in enumerate(bbox): 61 | x1,y1,x2,y2 = [int(i) for i in box] 62 | x1 += offset[0] 63 | x2 += offset[0] 64 | y1 += offset[1] 65 | y2 += offset[1] 66 | # box text and bar 67 | id = int(identities[i]) if identities is not None else 0 68 | color = COLORS_10[id%len(COLORS_10)] 69 | label = '{}{:d}'.format("", id) 70 | # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] 71 | img = plot_one_box([x1,y1,x2,y2], img, color, label) 72 | # cv2.rectangle(img,(x1, y1),(x2,y2),color,3) 73 | # cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1) 74 | # cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2) 75 | return img 76 | 77 | 78 | 79 | 80 | 81 | def softmax(x): 82 | assert isinstance(x, np.ndarray), "expect x be a numpy array" 83 | x_exp = np.exp(x*5) 84 | return x_exp/x_exp.sum() 85 | 86 | def softmin(x): 87 | assert isinstance(x, np.ndarray), "expect x be a numpy array" 88 | x_exp = np.exp(-x) 89 | return x_exp/x_exp.sum() 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | x = np.arange(10)/10. 95 | x = np.array([0.5,0.5,0.5,0.6,1.]) 96 | y = softmax(x) 97 | z = softmin(x) 98 | import ipdb; ipdb.set_trace() -------------------------------------------------------------------------------- /utils/visdom.py: -------------------------------------------------------------------------------- 1 | import visdom 2 | import time 3 | import numpy as np 4 | 5 | 6 | class Visualizer(object): 7 | def __init__(self, env='default', **kwargs): 8 | self.vis = visdom.Visdom(env=env, **kwargs) 9 | self.index = {} 10 | 11 | def plot_many_stack(self, d): 12 | ''' 13 | self.plot('loss',1.00) 14 | ''' 15 | name = list(d.keys()) 16 | name_total = " ".join(name) 17 | x = self.index.get(name_total, 0) 18 | val = list(d.values()) 19 | if len(val) == 1: 20 | y = np.array(val) 21 | else: 22 | y = np.array(val).reshape(-1, len(val)) 23 | # print(x) 24 | self.vis.line( 25 | Y=y, 26 | X=np.ones(y.shape) * x, 27 | win=str(name_total), # unicode 28 | opts=dict(legend=name, title=name_total), 29 | update=None if x == 0 else 'append') 30 | self.index[name_total] = x + 1 -------------------------------------------------------------------------------- /weights/download_yolov3_weights.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make '/weights' directory if it does not exist and cd into it 4 | # mkdir -p weights && cd weights 5 | 6 | # copy darknet weight files, continue '-c' if partially downloaded 7 | # wget -c https://pjreddie.com/media/files/yolov3.weights 8 | # wget -c https://pjreddie.com/media/files/yolov3-tiny.weights 9 | # wget -c https://pjreddie.com/media/files/yolov3-spp.weights 10 | 11 | # yolov3 pytorch weights 12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI 13 | 14 | # darknet53 weights (first 75 layers only) 15 | # wget -c https://pjreddie.com/media/files/darknet53.conv.74 16 | 17 | # yolov3-tiny weights from darknet (first 16 layers only) 18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15 19 | # mv yolov3-tiny.conv.15 ../ 20 | 21 | # new method 22 | python3 -c "from models import *; 23 | attempt_download('weights/yolov3.pt'); 24 | attempt_download('weights/yolov3-spp.pt')" 25 | -------------------------------------------------------------------------------- /weights/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf yolov3 weights coco 5 | git clone https://github.com/ultralytics/yolov3 6 | bash yolov3/weights/download_yolov3_weights.sh && cp -r weights yolov3 7 | bash yolov3/data/get_coco_dataset.sh 8 | git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3 9 | sudo reboot now 10 | 11 | # Re-clone 12 | rm -rf yolov3 13 | git clone https://github.com/ultralytics/yolov3 # master 14 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test # branch 15 | cp -r weights yolov3 16 | cp -r cocoapi/PythonAPI/pycocotools yolov3 17 | cd yolov3 18 | 19 | # Train 20 | python3 train.py 21 | 22 | # Resume 23 | python3 train.py --resume 24 | 25 | # Detect 26 | python3 detect.py 27 | 28 | # Test 29 | python3 test.py --save-json 30 | 31 | # Git pull 32 | git pull https://github.com/ultralytics/yolov3 # master 33 | git pull https://github.com/ultralytics/yolov3 test # branch 34 | 35 | # Test Darknet training 36 | python3 test.py --weights ../darknet/backup/yolov3.backup 37 | 38 | # Copy latest.pt TO bucket 39 | gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics 40 | 41 | # Copy latest.pt FROM bucket 42 | gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt 43 | wget https://storage.googleapis.com/ultralytics/yolov3/latest_v1_0.pt -O weights/latest_v1_0.pt 44 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt 45 | 46 | # Reproduce tutorials 47 | rm results*.txt # WARNING: removes existing results 48 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results3_1img.txt 49 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results3_10img.txt 50 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results4_100img.txt 51 | python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt 52 | python3 -c "from utils import utils; utils.plot_results()" 53 | gsutil cp results*.txt gs://ultralytics 54 | gsutil cp results.png gs://ultralytics 55 | sudo shutdown 56 | 57 | # Unit tests 58 | rm -rf yolov3 59 | git clone https://github.com/ultralytics/yolov3 # master 60 | cp -r weights yolov3 && cd yolov3 61 | python3 detect.py # detect 62 | python3 test.py --data data/coco_32img.data # test 63 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave # train 64 | 65 | # Debug/Development 66 | rm -rf yolov3 67 | git clone https://github.com/ultralytics/yolov3 # master 68 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test # branch 69 | cp -r cocoapi/PythonAPI/pycocotools yolov3 70 | cp -r weights yolov3 && cd yolov3 71 | python3 train.py --evolve --data data/coco_100img.data --num-workers 2 --epochs 30 72 | gsutil cp evolve.txt gs://ultralytics 73 | sudo shutdown 74 | --------------------------------------------------------------------------------