├── .gitignore
├── LICENSE
├── README.assets
    ├── 20200412221106751.png
    ├── 2020041418343015.png
    ├── 20200415100437671.png
    └── DeepSort.jpg
├── README.md
├── bak_results.txt
├── cfg
    ├── csresnext50-panet-spp.cfg
    ├── darknet19-3cls.cfg
    ├── darknet19-3l.cfg
    ├── mobile-yolo-cem.cfg
    ├── yolov3-1cls.cfg
    ├── yolov3-attention.cfg
    ├── yolov3-cbam.cfg
    ├── yolov3-dla.cfg
    ├── yolov3-se.cfg
    ├── yolov3-spp-1cls.cfg
    ├── yolov3-spp-3cls.cfg
    ├── yolov3-spp-matrix.cfg
    ├── yolov3-spp-pan-scale.cfg
    ├── yolov3-spp.cfg
    ├── yolov3-spp3.cfg
    ├── yolov3-tiny-1cls.cfg
    ├── yolov3-tiny-3cls.cfg
    ├── yolov3-tiny-cbam.cfg
    ├── yolov3-tiny.cfg
    ├── yolov3-tiny_3l.cfg
    ├── yolov3.cfg
    ├── yolov3_5l.cfg
    └── yolov3s.cfg
├── data
    ├── gcp.sh
    ├── get_coco2014.sh
    ├── get_coco2017.sh
    └── get_coco_dataset.sh
├── deep_sort.py
├── deep_sort
    ├── __init__.py
    ├── deep
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   ├── checkpoint
    │   │   ├── .gitkeep
    │   │   └── ckpt.t7
    │   ├── eval.py
    │   ├── feature_extractor.py
    │   ├── model.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── densenet.py
    │   │   ├── hacnn.py
    │   │   ├── inceptionresnetv2.py
    │   │   ├── inceptionv4.py
    │   │   ├── mlfn.py
    │   │   ├── mobilenetv2.py
    │   │   ├── mudeep.py
    │   │   ├── nasnet.py
    │   │   ├── original_model.py
    │   │   ├── osnet.py
    │   │   ├── osnet_ain.py
    │   │   ├── pcb.py
    │   │   ├── resnet.py
    │   │   ├── resnet_ibn_a.py
    │   │   ├── resnet_ibn_b.py
    │   │   ├── resnetmid.py
    │   │   ├── senet.py
    │   │   ├── shufflenet.py
    │   │   ├── shufflenetv2.py
    │   │   ├── squeezenet.py
    │   │   └── xception.py
    │   ├── oldfeature_extractor.py
    │   ├── train.py
    │   ├── train_wo_center.py
    │   └── utils
    │   │   ├── assign_train_val.py
    │   │   ├── center_loss.py
    │   │   ├── compute_mean_std.py
    │   │   ├── pre_deep.py
    │   │   ├── rename_all.py
    │   │   ├── tsne_vis.py
    │   │   └── visualize_actmap.py
    ├── deep_sort.py
    └── sort
    │   ├── __init__.py
    │   ├── detection.py
    │   ├── iou_matching.py
    │   ├── kalman_filter.py
    │   ├── linear_assignment.py
    │   ├── nn_matching.py
    │   ├── preprocessing.py
    │   ├── track.py
    │   └── tracker.py
├── detect.py
├── eval_mot.py
├── miniversion
    ├── cow.names
    ├── cv2MOT.py
    ├── models.py
    ├── predict.py
    ├── utils
    │   ├── __init__.py
    │   ├── adabound.py
    │   ├── datasets.py
    │   ├── gcp.sh
    │   ├── google_utils.py
    │   ├── parse_config.py
    │   ├── torch_utils.py
    │   ├── utils.py
    │   ├── utils_sort.py
    │   └── visdom.py
    └── yolov3-cbam.cfg
├── models.py
├── pre_mot.py
├── predict.py
├── sort.py
├── sort
    ├── LICENSE
    ├── README.md
    ├── __init__.py
    ├── data
    │   ├── ADL-Rundle-6
    │   │   └── det.txt
    │   ├── ADL-Rundle-8
    │   │   └── det.txt
    │   ├── ETH-Bahnhof
    │   │   └── det.txt
    │   ├── ETH-Pedcross2
    │   │   └── det.txt
    │   ├── ETH-Sunnyday
    │   │   └── det.txt
    │   ├── KITTI-13
    │   │   └── det.txt
    │   ├── KITTI-17
    │   │   └── det.txt
    │   ├── PETS09-S2L1
    │   │   └── det.txt
    │   ├── TUD-Campus
    │   │   └── det.txt
    │   ├── TUD-Stadtmitte
    │   │   └── det.txt
    │   └── Venice-2
    │   │   └── det.txt
    ├── requirements.txt
    └── sort.py
├── test.py
├── train.py
├── utils
    ├── __init__.py
    ├── adabound.py
    ├── anchor_cluster.py
    ├── datasets.py
    ├── gcp.sh
    ├── google_utils.py
    ├── layers.py
    ├── parse_config.py
    ├── process_darklabel.py
    ├── torch_utils.py
    ├── tsne_vis.py
    ├── utils.py
    ├── utils_sort.py
    └── visdom.py
└── weights
    ├── download_yolov3_weights.sh
    └── gcp.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Repo-specific GitIgnore ----------------------------------------------------------------------------------------------
  2 | *.jpg
  3 | *.png
  4 | *.bmp
  5 | *.tif
  6 | *.heic
  7 | *.JPG
  8 | *.PNG
  9 | *.TIF
 10 | *.HEIC
 11 | *.mp4
 12 | *.mov
 13 | *.MOV
 14 | *.avi
 15 | *.data
 16 | *.json
 17 | 
 18 | #*.cfg
 19 | !cfg/yolov3*.cfg
 20 | 
 21 | storage.googleapis.com
 22 | runs/*
 23 | data/*
 24 | !README.assets/*.png
 25 | !README.assets/*.jpg
 26 | !data/samples/zidane.jpg
 27 | !data/samples/bus.jpg
 28 | !data/coco.names
 29 | !data/coco_paper.names
 30 | !data/coco.data
 31 | !data/coco_*.data
 32 | !data/coco_*.txt
 33 | !data/trainvalno5k.shapes
 34 | !data/*.sh
 35 | 
 36 | pycocotools/*
 37 | results*.txt
 38 | gcp_test*.sh
 39 | 
 40 | # MATLAB GitIgnore -----------------------------------------------------------------------------------------------------
 41 | *.m~
 42 | *.mat
 43 | !targets*.mat
 44 | 
 45 | # Neural Network weights -----------------------------------------------------------------------------------------------
 46 | *.weights
 47 | *.pt
 48 | *.onnx
 49 | *.mlmodel
 50 | darknet53.conv.74
 51 | yolov3-tiny.conv.15
 52 | 
 53 | # GitHub Python GitIgnore ----------------------------------------------------------------------------------------------
 54 | # Byte-compiled / optimized / DLL files
 55 | __pycache__/
 56 | *.py[cod]
 57 | *$py.class
 58 | 
 59 | # C extensions
 60 | *.so
 61 | 
 62 | # Distribution / packaging
 63 | .Python
 64 | env/
 65 | build/
 66 | develop-eggs/
 67 | dist/
 68 | downloads/
 69 | eggs/
 70 | .eggs/
 71 | lib/
 72 | lib64/
 73 | parts/
 74 | sdist/
 75 | var/
 76 | wheels/
 77 | *.egg-info/
 78 | .installed.cfg
 79 | *.egg
 80 | 
 81 | # PyInstaller
 82 | #  Usually these files are written by a python script from a template
 83 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 84 | *.manifest
 85 | *.spec
 86 | 
 87 | # Installer logs
 88 | pip-log.txt
 89 | pip-delete-this-directory.txt
 90 | 
 91 | # Unit test / coverage reports
 92 | htmlcov/
 93 | .tox/
 94 | .coverage
 95 | .coverage.*
 96 | .cache
 97 | nosetests.xml
 98 | coverage.xml
 99 | *.cover
100 | .hypothesis/
101 | 
102 | # Translations
103 | *.mo
104 | *.pot
105 | 
106 | # Django stuff:
107 | *.log
108 | local_settings.py
109 | 
110 | # Flask stuff:
111 | instance/
112 | .webassets-cache
113 | 
114 | # Scrapy stuff:
115 | .scrapy
116 | 
117 | # Sphinx documentation
118 | docs/_build/
119 | 
120 | # PyBuilder
121 | target/
122 | 
123 | # Jupyter Notebook
124 | .ipynb_checkpoints
125 | 
126 | # pyenv
127 | .python-version
128 | 
129 | # celery beat schedule file
130 | celerybeat-schedule
131 | 
132 | # SageMath parsed files
133 | *.sage.py
134 | 
135 | # dotenv
136 | .env
137 | 
138 | # virtualenv
139 | .venv
140 | venv/
141 | ENV/
142 | 
143 | # Spyder project settings
144 | .spyderproject
145 | .spyproject
146 | 
147 | # Rope project settings
148 | .ropeproject
149 | 
150 | # mkdocs documentation
151 | /site
152 | 
153 | # mypy
154 | .mypy_cache/
155 | 
156 | 
157 | # https://github.com/github/gitignore/blob/master/Global/macOS.gitignore -----------------------------------------------
158 | 
159 | # General
160 | .DS_Store
161 | .AppleDouble
162 | .LSOverride
163 | 
164 | # Icon must end with two \r
165 | Icon
166 | Icon?
167 | 
168 | # Thumbnails
169 | ._*
170 | 
171 | # Files that might appear in the root of a volume
172 | .DocumentRevisions-V100
173 | .fseventsd
174 | .Spotlight-V100
175 | .TemporaryItems
176 | .Trashes
177 | .VolumeIcon.icns
178 | .com.apple.timemachine.donotpresent
179 | 
180 | # Directories potentially created on remote AFP share
181 | .AppleDB
182 | .AppleDesktop
183 | Network Trash Folder
184 | Temporary Items
185 | .apdisk
186 | 
187 | 
188 | # https://github.com/github/gitignore/blob/master/Global/JetBrains.gitignore
189 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
190 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
191 | 
192 | # User-specific stuff:
193 | .idea/*
194 | .idea/**/workspace.xml
195 | .idea/**/tasks.xml
196 | .idea/dictionaries
197 | .html  # Bokeh Plots
198 | .pg  # TensorFlow Frozen Graphs
199 | .avi # videos
200 | 
201 | # Sensitive or high-churn files:
202 | .idea/**/dataSources/
203 | .idea/**/dataSources.ids
204 | .idea/**/dataSources.local.xml
205 | .idea/**/sqlDataSources.xml
206 | .idea/**/dynamic.xml
207 | .idea/**/uiDesigner.xml
208 | 
209 | # Gradle:
210 | .idea/**/gradle.xml
211 | .idea/**/libraries
212 | 
213 | # CMake
214 | cmake-build-debug/
215 | cmake-build-release/
216 | 
217 | # Mongo Explorer plugin:
218 | .idea/**/mongoSettings.xml
219 | 
220 | ## File-based project format:
221 | *.iws
222 | 
223 | ## Plugin-specific files:
224 | 
225 | # IntelliJ
226 | out/
227 | 
228 | # mpeltonen/sbt-idea plugin
229 | .idea_modules/
230 | 
231 | # JIRA plugin
232 | atlassian-ide-plugin.xml
233 | 
234 | # Cursive Clojure plugin
235 | .idea/replstate.xml
236 | 
237 | # Crashlytics plugin (for Android Studio and IntelliJ)
238 | com_crashlytics_export_strings.xml
239 | crashlytics.properties
240 | crashlytics-build.properties
241 | fabric.properties
242 | 


--------------------------------------------------------------------------------
/README.assets/20200412221106751.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/20200412221106751.png


--------------------------------------------------------------------------------
/README.assets/2020041418343015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/2020041418343015.png


--------------------------------------------------------------------------------
/README.assets/20200415100437671.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/20200415100437671.png


--------------------------------------------------------------------------------
/README.assets/DeepSort.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/README.assets/DeepSort.jpg


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DEEP SORT YOLOV3 PYTORCH
 2 | 
 3 | 论文发表：[1]张宏鸣,汪润,董佩杰,孙红光,李书琴,王红艳.基于LSRCEM-YOLO算法的肉牛多目标跟踪[J/OL].农业机械学报:1-14[2022-03-07].https://kns-cnki-net-s.nudtproxy.yitlink.com:443/kcms/detail/11.1964.S.20210223.0955.004.html.
 4 | 
 5 | 
 6 | ## 新特性
 7 | 
 8 | - 目标检测部分添加了常用的注意力模块CBAM, SE
 9 | 
10 | - 添加了使用OpenCV进行目标跟踪的算法，第一帧使用YOLOv3进行检测。（在miniversion文件夹）
11 | 
12 | - 添加了SORT算法
13 | 
14 | - 完善ReID部分的训练
15 | 
16 | ## 快速入门
17 | 
18 | - [通过DarkLabel构建DeepSort标注格式和ReID数据集](https://zhuanlan.zhihu.com/p/137430266)
19 | 
20 | - [Deep SORT多目标跟踪算法代码解析(上)](https://zhuanlan.zhihu.com/p/133678626)
21 | 
22 | - [Deep SORT多目标跟踪算法代码解析(下)](https://zhuanlan.zhihu.com/p/133689982)
23 | 
24 | 
25 | ## 组织结构
26 | 
27 | cfg: cfg网络结构文件存放位置
28 | 
29 | deep_sort
30 | 
31 |  - deep: reid模块来自 https://github.com/pprp/reid_for_deepsort
32 |  - sort： deep sort沿用了sort中的一些模块，是最核心的部分
33 |  
34 | miniversion: 使用cv2中的跟踪模块+yolov3进行跟踪，效果较差
35 | 
36 | sort: sort算法需要的依赖文件
37 | 
38 | utils: yolov3中的包
39 | 
40 | weights: yolov3权重存放位置
41 | 
42 | deep_sort.py: 仅仅通过运行deep_sort完成目标跟踪过程，保存跟踪的结果（视频文件）
43 | 
44 | detect.py: 沿用自yolov3,用于侦测目标。
45 | 
46 | pre_mot.py：进行跟踪，并将结果文件保存下来。
47 | 
48 | eval_mot.py: 对跟踪的结果文件进行评估，得到指标。
49 | 
50 | models.py: 沿用自yolov3,是模型构建的代码。
51 | 
52 | predict.py：沿用自yolov3,侦测单张图片。
53 | 
54 | sort.py: sort算法再次调用
55 | 
56 | train.py: 训练yolov3
57 | 
58 | test.py: 测试yolov3
59 | 
60 | 
61 | 
62 | ## 代码注释
63 | 
64 | 完整讲解《Deep SORT多目标跟踪算法代码解析》在GiantPandaCV公众号首发，欢迎关注。
65 | 
66 | 主要提供了deep_sort文件夹中绝大部分代码的注释，以下是根据代码绘制的类图结构：
67 | 
68 | ![DeepSort](README.assets/DeepSort.jpg)
69 | 
70 | 状态转移：
71 | 
72 |  ![状态转换图](README.assets/20200415100437671.png) 
73 | 
74 | 整体框架：
75 | 
76 |  ![图片来自知乎Harlek](README.assets/20200412221106751.png) 
77 | 
78 | 流程图：
79 | 
80 |  ![知乎@猫弟总结的deep sort流程图](README.assets/2020041418343015.png) 
81 |  
82 |  ## 参考 
83 |  
84 | 目标检测：基于U版yolov3（版本比较早） https://github.com/ultralytics/yolov3 
85 | 
86 | ReID部分： https://github.com/pprp/reid_for_deepsort 
87 | 
88 | Deep SORT参考:  https://github.com/ZQPei/deep_sort_pytorch 
89 | 
90 | SORT参考： https://github.com/abewley/sort 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/cfg/darknet19-3cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=128
  3 | subdivisions=1
  4 | height=224
  5 | width=224
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | max_crop=448
 10 | 
 11 | learning_rate=0.1
 12 | policy=poly
 13 | power=4
 14 | max_batches=1600000
 15 | 
 16 | # 1
 17 | [convolutional]
 18 | batch_normalize=1
 19 | filters=32
 20 | size=3
 21 | stride=1
 22 | pad=1
 23 | activation=leaky
 24 | 
 25 | [maxpool] 
 26 | size=2
 27 | stride=2
 28 | 
 29 | # 3
 30 | [convolutional] 
 31 | batch_normalize=1
 32 | filters=64
 33 | size=3
 34 | stride=1
 35 | pad=1
 36 | activation=leaky
 37 | 
 38 | [maxpool]
 39 | size=2
 40 | stride=2
 41 | 
 42 | # 5
 43 | [convolutional] 
 44 | batch_normalize=1
 45 | filters=128
 46 | size=3
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=1
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | # 7 
 60 | [convolutional]
 61 | batch_normalize=1
 62 | filters=128
 63 | size=3
 64 | stride=1
 65 | pad=1
 66 | activation=leaky
 67 | 
 68 | [maxpool]
 69 | size=2
 70 | stride=2
 71 | 
 72 | # 9
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=1
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | # 11
 90 | [convolutional]
 91 | batch_normalize=1
 92 | filters=256
 93 | size=3
 94 | stride=1
 95 | pad=1
 96 | activation=leaky
 97 | 
 98 | [maxpool]
 99 | size=2
100 | stride=2
101 | 
102 | # 13
103 | [convolutional]
104 | batch_normalize=1
105 | filters=512
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 | 
111 | [convolutional]
112 | batch_normalize=1
113 | filters=256
114 | size=1
115 | stride=1
116 | pad=1
117 | activation=leaky
118 | 
119 | # 15
120 | [convolutional]
121 | batch_normalize=1
122 | filters=512
123 | size=3
124 | stride=1
125 | pad=1
126 | activation=leaky
127 | 
128 | [convolutional]
129 | batch_normalize=1
130 | filters=256
131 | size=1
132 | stride=1
133 | pad=1
134 | activation=leaky
135 | 
136 | # 17
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | # 19
150 | [convolutional]
151 | batch_normalize=1
152 | filters=1024
153 | size=3
154 | stride=1
155 | pad=1
156 | activation=leaky
157 | 
158 | [convolutional]
159 | batch_normalize=1
160 | filters=512
161 | size=1
162 | stride=1
163 | pad=1
164 | activation=leaky
165 | 
166 | # 21
167 | [convolutional]
168 | batch_normalize=1
169 | filters=1024
170 | size=3
171 | stride=1
172 | pad=1
173 | activation=leaky
174 | 
175 | [convolutional]
176 | batch_normalize=1
177 | filters=512
178 | size=1
179 | stride=1
180 | pad=1
181 | activation=leaky
182 | 
183 | #23 
184 | [convolutional]
185 | batch_normalize=1
186 | filters=1024
187 | size=3
188 | stride=1
189 | pad=1
190 | activation=leaky
191 | 
192 | ########################
193 | 
194 | 
195 | [convolutional]
196 | batch_normalize=1
197 | filters=512
198 | size=1
199 | stride=1
200 | pad=1
201 | activation=leaky
202 | 
203 | [convolutional]
204 | batch_normalize=1
205 | size=3
206 | stride=1
207 | pad=1
208 | filters=1024
209 | activation=leaky
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | filters=512
214 | size=1
215 | stride=1
216 | pad=1
217 | activation=leaky
218 | 
219 | [convolutional]
220 | batch_normalize=1
221 | size=3
222 | stride=1
223 | pad=1
224 | filters=1024
225 | activation=leaky
226 | 
227 | [convolutional]
228 | batch_normalize=1
229 | filters=512
230 | size=1
231 | stride=1
232 | pad=1
233 | activation=leaky
234 | 
235 | [convolutional]
236 | batch_normalize=1
237 | size=3
238 | stride=1
239 | pad=1
240 | filters=1024
241 | activation=leaky
242 | 
243 | [convolutional]
244 | size=1
245 | stride=1
246 | pad=1
247 | filters=255
248 | activation=linear
249 | 
250 | 
251 | [yolo]
252 | mask = 6,7,8
253 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
254 | classes=80
255 | num=9
256 | jitter=.3
257 | ignore_thresh = .7
258 | truth_thresh = 1
259 | random=1
260 | 
261 | 
262 | [route]
263 | layers = -4
264 | 
265 | [convolutional]
266 | batch_normalize=1
267 | filters=256
268 | size=1
269 | stride=1
270 | pad=1
271 | activation=leaky
272 | 
273 | [upsample]
274 | stride=2
275 | 
276 | [route]
277 | layers = -1, 17
278 | 
279 | 
280 | 
281 | [convolutional]
282 | batch_normalize=1
283 | filters=256
284 | size=1
285 | stride=1
286 | pad=1
287 | activation=leaky
288 | 
289 | [convolutional]
290 | batch_normalize=1
291 | size=3
292 | stride=1
293 | pad=1
294 | filters=512
295 | activation=leaky
296 | 
297 | [convolutional]
298 | batch_normalize=1
299 | filters=256
300 | size=1
301 | stride=1
302 | pad=1
303 | activation=leaky
304 | 
305 | [convolutional]
306 | batch_normalize=1
307 | size=3
308 | stride=1
309 | pad=1
310 | filters=512
311 | activation=leaky
312 | 
313 | [convolutional]
314 | batch_normalize=1
315 | filters=256
316 | size=1
317 | stride=1
318 | pad=1
319 | activation=leaky
320 | 
321 | [convolutional]
322 | batch_normalize=1
323 | size=3
324 | stride=1
325 | pad=1
326 | filters=512
327 | activation=leaky
328 | 
329 | [convolutional]
330 | size=1
331 | stride=1
332 | pad=1
333 | filters=255
334 | activation=linear
335 | 
336 | 
337 | [yolo]
338 | mask = 3,4,5
339 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
340 | classes=80
341 | num=9
342 | jitter=.3
343 | ignore_thresh = .7
344 | truth_thresh = 1
345 | random=1
346 | 
347 | 
348 | 
349 | [route]
350 | layers = -4
351 | 
352 | [convolutional]
353 | batch_normalize=1
354 | filters=128
355 | size=1
356 | stride=1
357 | pad=1
358 | activation=leaky
359 | 
360 | [upsample]
361 | stride=2
362 | 
363 | [route]
364 | layers = -1, 11
365 | 
366 | 
367 | 
368 | [convolutional]
369 | batch_normalize=1
370 | filters=128
371 | size=1
372 | stride=1
373 | pad=1
374 | activation=leaky
375 | 
376 | [convolutional]
377 | batch_normalize=1
378 | size=3
379 | stride=1
380 | pad=1
381 | filters=256
382 | activation=leaky
383 | 
384 | [convolutional]
385 | batch_normalize=1
386 | filters=128
387 | size=1
388 | stride=1
389 | pad=1
390 | activation=leaky
391 | 
392 | [convolutional]
393 | batch_normalize=1
394 | size=3
395 | stride=1
396 | pad=1
397 | filters=256
398 | activation=leaky
399 | 
400 | [convolutional]
401 | batch_normalize=1
402 | filters=128
403 | size=1
404 | stride=1
405 | pad=1
406 | activation=leaky
407 | 
408 | [convolutional]
409 | batch_normalize=1
410 | size=3
411 | stride=1
412 | pad=1
413 | filters=256
414 | activation=leaky
415 | 
416 | [convolutional]
417 | size=1
418 | stride=1
419 | pad=1
420 | filters=255
421 | activation=linear
422 | 
423 | 
424 | [yolo]
425 | mask = 0,1,2
426 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
427 | classes=80
428 | num=9
429 | jitter=.3
430 | ignore_thresh = .7
431 | truth_thresh = 1
432 | random=1
433 | 


--------------------------------------------------------------------------------
/cfg/darknet19-3l.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=128
  3 | subdivisions=1
  4 | height=224
  5 | width=224
  6 | channels=3
  7 | momentum=0.9
  8 | decay=0.0005
  9 | max_crop=448
 10 | 
 11 | learning_rate=0.1
 12 | policy=poly
 13 | power=4
 14 | max_batches=1600000
 15 | 
 16 | # 1
 17 | [convolutional]
 18 | batch_normalize=1
 19 | filters=32
 20 | size=3
 21 | stride=1
 22 | pad=1
 23 | activation=leaky
 24 | 
 25 | [maxpool] 
 26 | size=2
 27 | stride=2
 28 | 
 29 | # 3
 30 | [convolutional] 
 31 | batch_normalize=1
 32 | filters=64
 33 | size=3
 34 | stride=1
 35 | pad=1
 36 | activation=leaky
 37 | 
 38 | [maxpool]
 39 | size=2
 40 | stride=2
 41 | 
 42 | # 5
 43 | [convolutional] 
 44 | batch_normalize=1
 45 | filters=128
 46 | size=3
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=1
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | # 7 
 60 | [convolutional]
 61 | batch_normalize=1
 62 | filters=128
 63 | size=3
 64 | stride=1
 65 | pad=1
 66 | activation=leaky
 67 | 
 68 | [maxpool]
 69 | size=2
 70 | stride=2
 71 | 
 72 | # 9
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [convolutional]
 82 | batch_normalize=1
 83 | filters=128
 84 | size=1
 85 | stride=1
 86 | pad=1
 87 | activation=leaky
 88 | 
 89 | # 11
 90 | [convolutional]
 91 | batch_normalize=1
 92 | filters=256
 93 | size=3
 94 | stride=1
 95 | pad=1
 96 | activation=leaky
 97 | 
 98 | [maxpool]
 99 | size=2
100 | stride=2
101 | 
102 | # 13
103 | [convolutional]
104 | batch_normalize=1
105 | filters=512
106 | size=3
107 | stride=1
108 | pad=1
109 | activation=leaky
110 | 
111 | [convolutional]
112 | batch_normalize=1
113 | filters=256
114 | size=1
115 | stride=1
116 | pad=1
117 | activation=leaky
118 | 
119 | # 15
120 | [convolutional]
121 | batch_normalize=1
122 | filters=512
123 | size=3
124 | stride=1
125 | pad=1
126 | activation=leaky
127 | 
128 | [convolutional]
129 | batch_normalize=1
130 | filters=256
131 | size=1
132 | stride=1
133 | pad=1
134 | activation=leaky
135 | 
136 | # 17
137 | [convolutional]
138 | batch_normalize=1
139 | filters=512
140 | size=3
141 | stride=1
142 | pad=1
143 | activation=leaky
144 | 
145 | [maxpool]
146 | size=2
147 | stride=2
148 | 
149 | # 19
150 | [convolutional]
151 | batch_normalize=1
152 | filters=1024
153 | size=3
154 | stride=1
155 | pad=1
156 | activation=leaky
157 | 
158 | [convolutional]
159 | batch_normalize=1
160 | filters=512
161 | size=1
162 | stride=1
163 | pad=1
164 | activation=leaky
165 | 
166 | # 21
167 | [convolutional]
168 | batch_normalize=1
169 | filters=1024
170 | size=3
171 | stride=1
172 | pad=1
173 | activation=leaky
174 | 
175 | [convolutional]
176 | batch_normalize=1
177 | filters=512
178 | size=1
179 | stride=1
180 | pad=1
181 | activation=leaky
182 | 
183 | #23 
184 | [convolutional]
185 | batch_normalize=1
186 | filters=1024
187 | size=3
188 | stride=1
189 | pad=1
190 | activation=leaky
191 | 
192 | ########################
193 | 
194 | 
195 | [convolutional]
196 | batch_normalize=1
197 | filters=512
198 | size=1
199 | stride=1
200 | pad=1
201 | activation=leaky
202 | 
203 | [convolutional]
204 | batch_normalize=1
205 | size=3
206 | stride=1
207 | pad=1
208 | filters=1024
209 | activation=leaky
210 | 
211 | [convolutional]
212 | batch_normalize=1
213 | filters=512
214 | size=1
215 | stride=1
216 | pad=1
217 | activation=leaky
218 | 
219 | [convolutional]
220 | batch_normalize=1
221 | size=3
222 | stride=1
223 | pad=1
224 | filters=1024
225 | activation=leaky
226 | 
227 | [convolutional]
228 | batch_normalize=1
229 | filters=512
230 | size=1
231 | stride=1
232 | pad=1
233 | activation=leaky
234 | 
235 | [convolutional]
236 | batch_normalize=1
237 | size=3
238 | stride=1
239 | pad=1
240 | filters=1024
241 | activation=leaky
242 | 
243 | [convolutional]
244 | size=1
245 | stride=1
246 | pad=1
247 | filters=18
248 | activation=linear
249 | 
250 | 
251 | [yolo]
252 | mask = 6,7,8
253 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
254 | classes=1
255 | num=9
256 | jitter=.3
257 | ignore_thresh = .7
258 | truth_thresh = 1
259 | random=1
260 | 
261 | 
262 | [route]
263 | layers = -4
264 | 
265 | [convolutional]
266 | batch_normalize=1
267 | filters=256
268 | size=1
269 | stride=1
270 | pad=1
271 | activation=leaky
272 | 
273 | [upsample]
274 | stride=2
275 | 
276 | [route]
277 | layers = -1, 16 
278 | 
279 | 
280 | 
281 | [convolutional]
282 | batch_normalize=1
283 | filters=256
284 | size=1
285 | stride=1
286 | pad=1
287 | activation=leaky
288 | 
289 | [convolutional]
290 | batch_normalize=1
291 | size=3
292 | stride=1
293 | pad=1
294 | filters=512
295 | activation=leaky
296 | 
297 | [convolutional]
298 | batch_normalize=1
299 | filters=256
300 | size=1
301 | stride=1
302 | pad=1
303 | activation=leaky
304 | 
305 | [convolutional]
306 | batch_normalize=1
307 | size=3
308 | stride=1
309 | pad=1
310 | filters=512
311 | activation=leaky
312 | 
313 | [convolutional]
314 | batch_normalize=1
315 | filters=256
316 | size=1
317 | stride=1
318 | pad=1
319 | activation=leaky
320 | 
321 | [convolutional]
322 | batch_normalize=1
323 | size=3
324 | stride=1
325 | pad=1
326 | filters=512
327 | activation=leaky
328 | 
329 | [convolutional]
330 | size=1
331 | stride=1
332 | pad=1
333 | filters=18
334 | activation=linear
335 | 
336 | 
337 | [yolo]
338 | mask = 3,4,5
339 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
340 | classes=1
341 | num=9
342 | jitter=.3
343 | ignore_thresh = .7
344 | truth_thresh = 1
345 | random=1
346 | 
347 | 
348 | 
349 | [route]
350 | layers = -4
351 | 
352 | [convolutional]
353 | batch_normalize=1
354 | filters=128
355 | size=1
356 | stride=1
357 | pad=1
358 | activation=leaky
359 | 
360 | [upsample]
361 | stride=2
362 | 
363 | [route]
364 | layers = -1, 10
365 | 
366 | 
367 | 
368 | [convolutional]
369 | batch_normalize=1
370 | filters=128
371 | size=1
372 | stride=1
373 | pad=1
374 | activation=leaky
375 | 
376 | [convolutional]
377 | batch_normalize=1
378 | size=3
379 | stride=1
380 | pad=1
381 | filters=256
382 | activation=leaky
383 | 
384 | [convolutional]
385 | batch_normalize=1
386 | filters=128
387 | size=1
388 | stride=1
389 | pad=1
390 | activation=leaky
391 | 
392 | [convolutional]
393 | batch_normalize=1
394 | size=3
395 | stride=1
396 | pad=1
397 | filters=256
398 | activation=leaky
399 | 
400 | [convolutional]
401 | batch_normalize=1
402 | filters=128
403 | size=1
404 | stride=1
405 | pad=1
406 | activation=leaky
407 | 
408 | [convolutional]
409 | batch_normalize=1
410 | size=3
411 | stride=1
412 | pad=1
413 | filters=256
414 | activation=leaky
415 | 
416 | [convolutional]
417 | size=1
418 | stride=1
419 | pad=1
420 | filters=18
421 | activation=linear
422 | 
423 | 
424 | [yolo]
425 | mask = 0,1,2
426 | anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
427 | classes=1
428 | num=9
429 | jitter=.3
430 | ignore_thresh = .7
431 | truth_thresh = 1
432 | random=1
433 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny-1cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=18
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=1
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=18
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=1
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny-3cls.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=24
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=3
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=24
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 0,1,2
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=3
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny-cbam.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 200000
 21 | policy=steps
 22 | steps=180000,190000
 23 | scales=.1,.1
 24 | 
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | [cbam]
 35 | size=7
 36 | 
 37 | [maxpool]
 38 | size=2
 39 | stride=2
 40 | 
 41 | [convolutional]
 42 | batch_normalize=1
 43 | filters=32
 44 | size=3
 45 | stride=1
 46 | pad=1
 47 | activation=leaky
 48 | 
 49 | [cbam]
 50 | size=7
 51 | 
 52 | [maxpool]
 53 | size=2
 54 | stride=2
 55 | 
 56 | [convolutional]
 57 | batch_normalize=1
 58 | filters=64
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [cbam]
 65 | size=7
 66 | 
 67 | [maxpool]
 68 | size=2
 69 | stride=2
 70 | 
 71 | [convolutional]
 72 | batch_normalize=1
 73 | filters=128
 74 | size=3
 75 | stride=1
 76 | pad=1
 77 | activation=leaky
 78 | 
 79 | [cbam]
 80 | size=7
 81 | 
 82 | [maxpool]
 83 | size=2
 84 | stride=2
 85 | 
 86 | [convolutional]
 87 | batch_normalize=1
 88 | filters=256
 89 | size=3
 90 | stride=1
 91 | pad=1
 92 | activation=leaky
 93 | 
 94 | [cbam]
 95 | size=7
 96 | 
 97 | [maxpool]
 98 | size=2
 99 | stride=2
100 | 
101 | [convolutional]
102 | batch_normalize=1
103 | filters=512
104 | size=3
105 | stride=1
106 | pad=1
107 | activation=leaky
108 | 
109 | [cbam]
110 | size=7
111 | 
112 | [maxpool]
113 | size=2
114 | stride=1
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=1024
119 | size=3
120 | stride=1
121 | pad=1
122 | activation=leaky
123 | 
124 | [cbam]
125 | size=7
126 | 
127 | ###########
128 | 
129 | [convolutional]
130 | batch_normalize=1
131 | filters=256
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 | 
137 | 
138 | [convolutional]
139 | batch_normalize=1
140 | filters=512
141 | size=3
142 | stride=1
143 | pad=1
144 | activation=leaky
145 | 
146 | 
147 | 
148 | [convolutional]
149 | size=1
150 | stride=1
151 | pad=1
152 | filters=18
153 | activation=linear
154 | 
155 | [yolo]
156 | mask = 6,7,8
157 | anchors = 4,7, 7,15, 13,25,   25,42, 41,67, 75,94,   91,162, 158,205, 250,332
158 | classes=1
159 | num=9
160 | jitter=.3
161 | ignore_thresh = .7
162 | truth_thresh = 1
163 | random=1
164 | 
165 | [route]
166 | layers = -6
167 | 
168 | [convolutional]
169 | batch_normalize=1
170 | filters=128
171 | size=1
172 | stride=1
173 | pad=1
174 | activation=leaky
175 | 
176 | 
177 | [upsample]
178 | stride=2
179 | 
180 | [route]
181 | layers = -1, 8
182 | 
183 | [convolutional]
184 | batch_normalize=1
185 | filters=256
186 | size=3
187 | stride=1
188 | pad=1
189 | activation=leaky
190 | 
191 | 
192 | [convolutional]
193 | size=1
194 | stride=1
195 | pad=1
196 | filters=18
197 | activation=linear
198 | 
199 | [yolo]
200 | mask = 3,4,5
201 | anchors = 4,7, 7,15, 13,25,   25,42, 41,67, 75,94,   91,162, 158,205, 250,332
202 | classes=1
203 | num=9
204 | jitter=.3
205 | ignore_thresh = .7
206 | truth_thresh = 1
207 | random=1
208 | 
209 | [route]
210 | layers = -5
211 | 
212 | [convolutional]
213 | batch_normalize=1
214 | filters=128
215 | size=1
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | 
221 | [upsample]
222 | stride=2
223 | 
224 | [route]
225 | layers = -1, 6
226 | 
227 | [convolutional]
228 | batch_normalize=1
229 | filters=128
230 | size=3
231 | stride=1
232 | pad=1
233 | activation=leaky
234 | 
235 | [convolutional]
236 | size=1
237 | stride=1
238 | pad=1
239 | filters=18
240 | activation=linear
241 | 
242 | [yolo]
243 | mask = 0,1,2
244 | anchors = 4,7, 7,15, 13,25,   25,42, 41,67, 75,94,   91,162, 158,205, 250,332
245 | classes=1
246 | num=9
247 | jitter=.3
248 | ignore_thresh = .7
249 | truth_thresh = 1
250 | random=1


--------------------------------------------------------------------------------
/cfg/yolov3-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | batch=1
  4 | subdivisions=1
  5 | # Training
  6 | # batch=64
  7 | # subdivisions=2
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=16
 28 | size=3
 29 | stride=1
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [maxpool]
 34 | size=2
 35 | stride=2
 36 | 
 37 | [convolutional]
 38 | batch_normalize=1
 39 | filters=32
 40 | size=3
 41 | stride=1
 42 | pad=1
 43 | activation=leaky
 44 | 
 45 | [maxpool]
 46 | size=2
 47 | stride=2
 48 | 
 49 | [convolutional]
 50 | batch_normalize=1
 51 | filters=64
 52 | size=3
 53 | stride=1
 54 | pad=1
 55 | activation=leaky
 56 | 
 57 | [maxpool]
 58 | size=2
 59 | stride=2
 60 | 
 61 | [convolutional]
 62 | batch_normalize=1
 63 | filters=128
 64 | size=3
 65 | stride=1
 66 | pad=1
 67 | activation=leaky
 68 | 
 69 | [maxpool]
 70 | size=2
 71 | stride=2
 72 | 
 73 | [convolutional]
 74 | batch_normalize=1
 75 | filters=256
 76 | size=3
 77 | stride=1
 78 | pad=1
 79 | activation=leaky
 80 | 
 81 | [maxpool]
 82 | size=2
 83 | stride=2
 84 | 
 85 | [convolutional]
 86 | batch_normalize=1
 87 | filters=512
 88 | size=3
 89 | stride=1
 90 | pad=1
 91 | activation=leaky
 92 | 
 93 | [maxpool]
 94 | size=2
 95 | stride=1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=1024
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | ###########
106 | 
107 | [convolutional]
108 | batch_normalize=1
109 | filters=256
110 | size=1
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [convolutional]
116 | batch_normalize=1
117 | filters=512
118 | size=3
119 | stride=1
120 | pad=1
121 | activation=leaky
122 | 
123 | [convolutional]
124 | size=1
125 | stride=1
126 | pad=1
127 | filters=255
128 | activation=linear
129 | 
130 | 
131 | 
132 | [yolo]
133 | mask = 3,4,5
134 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
135 | classes=80
136 | num=6
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=255
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 1,2,3
176 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
177 | classes=80
178 | num=6
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 


--------------------------------------------------------------------------------
/cfg/yolov3-tiny_3l.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | # batch=1
  4 | # subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=16
  8 | width=608
  9 | height=608
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.001
 19 | burn_in=1000
 20 | max_batches = 200000
 21 | policy=steps
 22 | steps=180000,190000
 23 | scales=.1,.1
 24 | 
 25 | 
 26 | [convolutional]
 27 | batch_normalize=1
 28 | filters=16
 29 | size=3
 30 | stride=1
 31 | pad=1
 32 | activation=leaky
 33 | 
 34 | 
 35 | [maxpool]
 36 | size=2
 37 | stride=2
 38 | 
 39 | [convolutional]
 40 | batch_normalize=1
 41 | filters=32
 42 | size=3
 43 | stride=1
 44 | pad=1
 45 | activation=leaky
 46 | 
 47 | [maxpool]
 48 | size=2
 49 | stride=2
 50 | 
 51 | [convolutional]
 52 | batch_normalize=1
 53 | filters=64
 54 | size=3
 55 | stride=1
 56 | pad=1
 57 | activation=leaky
 58 | 
 59 | [maxpool]
 60 | size=2
 61 | stride=2
 62 | 
 63 | [convolutional]
 64 | batch_normalize=1
 65 | filters=128
 66 | size=3
 67 | stride=1
 68 | pad=1
 69 | activation=leaky
 70 | 
 71 | [maxpool]
 72 | size=2
 73 | stride=2
 74 | 
 75 | [convolutional]
 76 | batch_normalize=1
 77 | filters=256
 78 | size=3
 79 | stride=1
 80 | pad=1
 81 | activation=leaky
 82 | 
 83 | [maxpool]
 84 | size=2
 85 | stride=2
 86 | 
 87 | [convolutional]
 88 | batch_normalize=1
 89 | filters=512
 90 | size=3
 91 | stride=1
 92 | pad=1
 93 | activation=leaky
 94 | 
 95 | [maxpool]
 96 | size=2
 97 | stride=1
 98 | 
 99 | [convolutional]
100 | batch_normalize=1
101 | filters=1024
102 | size=3
103 | stride=1
104 | pad=1
105 | activation=leaky
106 | 
107 | ###########
108 | 
109 | [convolutional]
110 | batch_normalize=1
111 | filters=256
112 | size=1
113 | stride=1
114 | pad=1
115 | activation=leaky
116 | 
117 | [convolutional]
118 | batch_normalize=1
119 | filters=512
120 | size=3
121 | stride=1
122 | pad=1
123 | activation=leaky
124 | 
125 | [convolutional]
126 | size=1
127 | stride=1
128 | pad=1
129 | filters=18
130 | activation=linear
131 | 
132 | [yolo]
133 | mask = 6,7,8
134 | anchors = 4,7, 7,15, 13,25,   25,42, 41,67, 75,94,   91,162, 158,205, 250,332
135 | classes=1
136 | num=9
137 | jitter=.3
138 | ignore_thresh = .7
139 | truth_thresh = 1
140 | random=1
141 | 
142 | [route]
143 | layers = -4
144 | 
145 | [convolutional]
146 | batch_normalize=1
147 | filters=128
148 | size=1
149 | stride=1
150 | pad=1
151 | activation=leaky
152 | 
153 | [upsample]
154 | stride=2
155 | 
156 | [route]
157 | layers = -1, 8
158 | 
159 | [convolutional]
160 | batch_normalize=1
161 | filters=256
162 | size=3
163 | stride=1
164 | pad=1
165 | activation=leaky
166 | 
167 | [convolutional]
168 | size=1
169 | stride=1
170 | pad=1
171 | filters=18
172 | activation=linear
173 | 
174 | [yolo]
175 | mask = 3,4,5
176 | anchors = 4,7, 7,15, 13,25,   25,42, 41,67, 75,94,   91,162, 158,205, 250,332
177 | classes=1
178 | num=9
179 | jitter=.3
180 | ignore_thresh = .7
181 | truth_thresh = 1
182 | random=1
183 | 
184 | 
185 | 
186 | [route]
187 | layers = -3
188 | 
189 | [convolutional]
190 | batch_normalize=1
191 | filters=128
192 | size=1
193 | stride=1
194 | pad=1
195 | activation=leaky
196 | 
197 | [upsample]
198 | stride=2
199 | 
200 | [route]
201 | layers = -1, 6
202 | 
203 | [convolutional]
204 | batch_normalize=1
205 | filters=128
206 | size=3
207 | stride=1
208 | pad=1
209 | activation=leaky
210 | 
211 | [convolutional]
212 | size=1
213 | stride=1
214 | pad=1
215 | filters=18
216 | activation=linear
217 | 
218 | [yolo]
219 | mask = 0,1,2
220 | anchors = 4,7, 7,15, 13,25,   25,42, 41,67, 75,94,   91,162, 158,205, 250,332
221 | classes=1
222 | num=9
223 | jitter=.3
224 | ignore_thresh = .7
225 | truth_thresh = 1
226 | random=1


--------------------------------------------------------------------------------
/data/gcp.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # New VM
 4 | rm -rf yolov3 weights coco
 5 | git clone https://github.com/ultralytics/yolov3
 6 | bash yolov3/weights/download_yolov3_weights.sh && cp -r weights yolov3
 7 | bash yolov3/data/get_coco_dataset.sh
 8 | git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
 9 | sudo reboot now
10 | 
11 | # Re-clone
12 | rm -rf yolov3
13 | git clone https://github.com/ultralytics/yolov3  # master
14 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test  # branch
15 | cp -r weights yolov3
16 | cp -r cocoapi/PythonAPI/pycocotools yolov3
17 | cd yolov3
18 | 
19 | # Train
20 | python3 train.py
21 | 
22 | # Resume
23 | python3 train.py --resume
24 | 
25 | # Detect
26 | python3 detect.py
27 | 
28 | # Test
29 | python3 test.py --save-json
30 | 
31 | # Git pull
32 | git pull https://github.com/ultralytics/yolov3  # master
33 | git pull https://github.com/ultralytics/yolov3 test  # branch
34 | 
35 | # Test Darknet training
36 | python3 test.py --weights ../darknet/backup/yolov3.backup
37 | 
38 | # Copy latest.pt TO bucket
39 | gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics
40 | 
41 | # Copy latest.pt FROM bucket
42 | gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt
43 | wget https://storage.googleapis.com/ultralytics/yolov3/latest_v1_0.pt -O weights/latest_v1_0.pt
44 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
45 | 
46 | # Reproduce tutorials
47 | rm results*.txt  # WARNING: removes existing results
48 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results3_1img.txt
49 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results3_10img.txt
50 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results4_100img.txt
51 | python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
52 | python3 -c "from utils import utils; utils.plot_results()"
53 | gsutil cp results*.txt gs://ultralytics
54 | gsutil cp results.png gs://ultralytics
55 | sudo shutdown
56 | 
57 | # Unit tests
58 | rm -rf yolov3
59 | git clone https://github.com/ultralytics/yolov3  # master
60 | cp -r weights yolov3  && cd yolov3
61 | python3 detect.py  # detect
62 | python3 test.py --data data/coco_32img.data  # test
63 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave  # train
64 | 
65 | # Debug/Development
66 | rm -rf yolov3
67 | git clone https://github.com/ultralytics/yolov3  # master
68 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test  # branch
69 | cp -r cocoapi/PythonAPI/pycocotools yolov3
70 | cp -r weights yolov3 && cd yolov3
71 | python3 train.py --evolve --data data/coco_100img.data --num-workers 2  --epochs 30
72 | gsutil cp evolve.txt gs://ultralytics
73 | sudo shutdown
74 | 


--------------------------------------------------------------------------------
/data/get_coco2014.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Zip coco folder
 3 | # zip -r coco.zip coco
 4 | # tar -czvf coco.tar.gz coco
 5 | 
 6 | # Download labels from Google Drive, accepting presented query
 7 | filename="coco2014labels.zip"
 8 | fileid="1s6-CmF5_SElM28r52P1OUrCcuXZN-SFo"
 9 | 
10 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
11 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
12 | rm ./cookie
13 | 
14 | # Unzip labels
15 | unzip -q ${filename}  # for coco.zip
16 | # tar -xzf ${filename}  # for coco.tar.gz
17 | rm ${filename}
18 | 
19 | # Download images
20 | cd coco/images
21 | curl http://images.cocodataset.org/zips/train2014.zip -o train2014.zip
22 | curl http://images.cocodataset.org/zips/val2014.zip -o val2014.zip
23 | 
24 | # Unzip images
25 | unzip -q train2014.zip
26 | unzip -q val2014.zip
27 | 
28 | # (optional) Delete zip files
29 | rm -rf *.zip
30 | 
31 | # cd out
32 | cd ../..
33 | 
34 | 


--------------------------------------------------------------------------------
/data/get_coco2017.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Zip coco folder
 3 | # zip -r coco.zip coco
 4 | # tar -czvf coco.tar.gz coco
 5 | 
 6 | # Download labels from Google Drive, accepting presented query
 7 | filename="coco2017labels.zip"
 8 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L"
 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
11 | rm ./cookie
12 | 
13 | # Unzip labels
14 | unzip -q ${filename}  # for coco.zip
15 | # tar -xzf ${filename}  # for coco.tar.gz
16 | rm ${filename}
17 | 
18 | # Download images
19 | cd coco/images
20 | curl http://images.cocodataset.org/zips/train2017.zip -o train2017.zip
21 | curl http://images.cocodataset.org/zips/val2017.zip -o val2017.zip
22 | 
23 | # Unzip images
24 | unzip -q train2017.zip
25 | unzip -q val2017.zip
26 | 
27 | # (optional) Delete zip files
28 | rm -rf *.zip
29 | 
30 | # cd out
31 | cd ../..
32 | 
33 | 


--------------------------------------------------------------------------------
/data/get_coco_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # CREDIT: https://github.com/pjreddie/darknet/tree/master/scripts/get_coco_dataset.sh
 3 | 
 4 | # Clone COCO API
 5 | git clone https://github.com/pdollar/coco && cd coco
 6 | 
 7 | # Download Images
 8 | mkdir images && cd images
 9 | wget -c https://pjreddie.com/media/files/train2014.zip
10 | wget -c https://pjreddie.com/media/files/val2014.zip
11 | 
12 | # Unzip
13 | unzip -q train2014.zip
14 | unzip -q val2014.zip
15 | 
16 | # (optional) Delete zip files
17 | rm -rf *.zip
18 | 
19 | cd ..
20 | 
21 | # Download COCO Metadata
22 | wget -c https://pjreddie.com/media/files/instances_train-val2014.zip
23 | wget -c https://pjreddie.com/media/files/coco/5k.part
24 | wget -c https://pjreddie.com/media/files/coco/trainvalno5k.part
25 | wget -c https://pjreddie.com/media/files/coco/labels.tgz
26 | tar xzf labels.tgz
27 | unzip -q instances_train-val2014.zip
28 | 
29 | # Set Up Image Lists
30 | paste <(awk "{print \"$PWD\"}" <5k.part) 5k.part | tr -d '\t' > 5k.txt
31 | paste <(awk "{print \"$PWD\"}" <trainvalno5k.part) trainvalno5k.part | tr -d '\t' > trainvalno5k.txt
32 | 
33 | # get xview training data
34 | # wget -O train_images.tgz 'https://d307kc0mrhucc3.cloudfront.net/train_images.tgz?Expires=1530124049&Signature=JrQoxipmsETvb7eQHCfDFUO-QEHJGAayUv0i-ParmS-1hn7hl9D~bzGuHWG82imEbZSLUARTtm0wOJ7EmYMGmG5PtLKz9H5qi6DjoSUuFc13NQ-~6yUhE~NfPaTnehUdUMCa3On2wl1h1ZtRG~0Jq1P-AJbpe~oQxbyBrs1KccaMa7FK4F4oMM6sMnNgoXx8-3O77kYw~uOpTMFmTaQdHln6EztW0Lx17i57kK3ogbSUpXgaUTqjHCRA1dWIl7PY1ngQnLslkLhZqmKcaL-BvWf0ZGjHxCDQBpnUjIlvMu5NasegkwD9Jjc0ClgTxsttSkmbapVqaVC8peR0pO619Q__&Key-Pair-Id=APKAIKGDJB5C3XUL2DXQ'
35 | # tar -xvzf train_images.tgz
36 | # sudo rm -rf train_images/._*
37 | # lastly convert each .tif to a .bmp for faster loading in cv2
38 | 
39 | # /home/glenn_jocher3/coco/images/train2014/COCO_train2014_000000167126.jpg  # bad image??
40 | 


--------------------------------------------------------------------------------
/deep_sort.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import argparse
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | from collections import deque
  9 | from predict import InferYOLOv3
 10 | from utils.utils import xyxy2xywh
 11 | from deep_sort import DeepSort
 12 | from utils.utils_sort import COLORS_10, draw_bboxes
 13 | 
 14 | '''
 15 | mot results:
 16 | ------------
 17 | frame, id(从1开始), tlwh(%.2f),1,-1,-1,-1 
 18 | 3,1,97.00,545.00,79.00,239.00,1,-1,-1,-1
 19 | 3,2,376.24,396.64,83.44,252.43,1,-1,-1,-1
 20 | 3,3,546.66,146.51,59.63,180.89,1,-1,-1,-1
 21 | 3,4,1630.61,251.64,68.72,208.46,1,-1,-1,-1
 22 | 3,5,1043.80,134.38,59.63,180.89,1,-1,-1,-1
 23 | 3,6,792.96,148.08,55.57,168.71,1,-1,-1,-1
 24 | 3,7,1732.55,448.65,73.69,223.20,1,-1,-1,-1
 25 | '''
 26 | 
 27 | 
 28 | def xyxy2tlwh(x):
 29 |     '''
 30 |     (top left x, top left y,width, height)
 31 |     '''
 32 |     y = torch.zeros_like(x) if isinstance(x,
 33 |                                           torch.Tensor) else np.zeros_like(x)
 34 |     y[:, 0] = x[:, 0]
 35 |     y[:, 1] = x[:, 1]
 36 |     y[:, 2] = x[:, 2] - x[:, 0]
 37 |     y[:, 3] = x[:, 3] - x[:, 1]
 38 |     return y
 39 | 
 40 | 
 41 | class Detector(object):
 42 |     def __init__(self, args):
 43 |         self.args = args
 44 |         if args.display:
 45 |             cv2.namedWindow("test", cv2.WINDOW_NORMAL)
 46 |             cv2.resizeWindow("test", args.display_width, args.display_height)
 47 | 
 48 |         device = torch.device(
 49 |             'cuda') if torch.cuda.is_available() else torch.device('cpu')
 50 | 
 51 |         self.vdo = cv2.VideoCapture()
 52 |         self.yolo3 = InferYOLOv3(args.yolo_cfg,
 53 |                                  args.img_size,
 54 |                                  args.yolo_weights,
 55 |                                  args.data_cfg,
 56 |                                  device,
 57 |                                  conf_thres=args.conf_thresh,
 58 |                                  nms_thres=args.nms_thresh)
 59 |         self.deepsort = DeepSort(args.deepsort_checkpoint)
 60 | 
 61 |     def __enter__(self):
 62 |         assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
 63 |         self.vdo.open(self.args.VIDEO_PATH)
 64 |         self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
 65 |         self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
 66 | 
 67 |         if self.args.save_path:
 68 |             fourcc = cv2.VideoWriter_fourcc(*'MJPG')
 69 |             self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20,
 70 |                                           (self.im_width, self.im_height))
 71 | 
 72 |         assert self.vdo.isOpened()
 73 |         return self
 74 | 
 75 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 76 |         if exc_type:
 77 |             print(exc_type, exc_value, exc_traceback)
 78 | 
 79 |     def detect(self, outfile=None):
 80 |         frame_cnt = -1
 81 | 
 82 |         if outfile is not None:
 83 |             f = open(outfile, 'w')
 84 |         
 85 |         print("begin....")
 86 | 
 87 |         while self.vdo.grab():
 88 |             frame_cnt += 1
 89 | 
 90 |             if frame_cnt % 3 == 0:
 91 |                 continue
 92 | 
 93 |             start = time.time()
 94 |             _, ori_im = self.vdo.retrieve()
 95 |             im = ori_im
 96 | 
 97 |             t1_begin = time.time()
 98 |             bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im)
 99 |             t1_end = time.time()
100 | 
101 |             t2_begin = time.time()
102 |             if bbox_xxyy is not None:
103 |                 # select class
104 |                 # mask = cls_ids == 0
105 |                 # bbox_xxyy = bbox_xxyy[mask]
106 | 
107 |                 # bbox_xxyy[:, 3:] *= 1.2
108 |                 # cls_conf = cls_conf[mask]
109 | 
110 |                 bbox_xcycwh = xyxy2xywh(bbox_xxyy)
111 |                 outputs = self.deepsort.update(bbox_xcycwh, cls_conf, im)
112 | 
113 |                 if len(outputs) > 0:
114 |                     bbox_xyxy = outputs[:, :4]
115 |                     identities = outputs[:, -1]
116 |                     # 画框
117 |                     ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)
118 | 
119 |                     # frame, id, tlwh(%.2f),1,-1,-1,-1
120 |                     if outfile is not None:
121 |                         box_xywh = xyxy2tlwh(bbox_xyxy)
122 |                         for i in range(len(box_xywh)):
123 |                             write_line = "%d,%d,%d,%d,%d,%d,1,-1,-1,-1\n" % (
124 |                                 frame_cnt +
125 |                                 1, outputs[i, -1], int(box_xywh[i]
126 |                                                        [0]), int(box_xywh[i][1]),
127 |                                 int(box_xywh[i][2]), int(box_xywh[i][3]))
128 |                             f.write(write_line)
129 | 
130 |             t2_end = time.time()
131 | 
132 |             end = time.time()
133 |             print(
134 |                 "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f"
135 |                 % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin),
136 |                    (end - start), ((t1_end - t1_begin) * 100 /
137 |                                    ((end - start))), (1 / (end - start))))
138 |             if self.args.display:
139 |                 cv2.imshow("test", ori_im)
140 |                 cv2.waitKey(1)
141 | 
142 |             if self.args.save_path:
143 |                 self.output.write(ori_im)
144 | 
145 |         if outfile is not None:
146 |             f.close()
147 | 
148 | 
149 | def parse_args():
150 |     parser = argparse.ArgumentParser()
151 |     parser.add_argument("VIDEO_PATH", type=str)
152 |     parser.add_argument("--yolo_cfg",
153 |                         type=str,
154 |                         default="../YOLOv3-complete-pruning-master/cfg/dense-v3-tiny-spp.cfg"
155 |                         ) 
156 |     parser.add_argument(
157 |         "--yolo_weights",
158 |         type=str,
159 |         default="../YOLOv3-complete-pruning-master/weights/A6/last.pt"
160 |     )
161 |     parser.add_argument("--conf_thresh", type=float, default=0.5)  # ori 0.5
162 |     parser.add_argument("--nms_thresh", type=float, default=0.3)
163 |     parser.add_argument("--deepsort_checkpoint",
164 |                         type=str,
165 |                         default="deep_sort/deep/checkpoint/mobilenetv2_x1_0_best.pt")
166 |     parser.add_argument("--max_dist", type=float, default=0.2)
167 |     parser.add_argument("--ignore_display",
168 |                         dest="display",
169 |                         action="store_false")
170 |     parser.add_argument("--display_width", type=int, default=800)
171 |     parser.add_argument("--display_height", type=int, default=600)
172 |     parser.add_argument("--save_path", type=str, default="demo.avi")
173 |     parser.add_argument("--data_cfg", type=str, default="data/voc_small.data")
174 |     parser.add_argument("--img_size", type=int, default=416, help="img size")
175 | 
176 |     return parser.parse_args()
177 | 
178 | 
179 | if __name__ == "__main__":
180 |     args = parse_args()
181 |     output_file = "./data/videosample/predicts.txt"
182 |     with Detector(args) as det:
183 |         det.detect(output_file)
184 | 
185 |     os.system("ffmpeg -y -i demo.avi -r 10 -b:a 32k %s_output.mp4" %
186 |               (os.path.basename(args.VIDEO_PATH).split('.')[0]))
187 | 


--------------------------------------------------------------------------------
/deep_sort/__init__.py:
--------------------------------------------------------------------------------
1 | from .deep_sort import DeepSort


--------------------------------------------------------------------------------
/deep_sort/deep/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.jpg
3 | checkpoint/ckpt.t7
4 | *.json
5 | data/videoAndLabel/cutout8.mp4
6 | 


--------------------------------------------------------------------------------
/deep_sort/deep/README.md:
--------------------------------------------------------------------------------
1 | # reid_for_deepsort
2 | simplest reid for https://github.com/pprp/yolov3.pytorch
3 | 


--------------------------------------------------------------------------------
/deep_sort/deep/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/deep/__init__.py


--------------------------------------------------------------------------------
/deep_sort/deep/checkpoint/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/deep/checkpoint/.gitkeep


--------------------------------------------------------------------------------
/deep_sort/deep/checkpoint/ckpt.t7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/deep/checkpoint/ckpt.t7


--------------------------------------------------------------------------------
/deep_sort/deep/eval.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import random
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from torch.autograd import Variable
  9 | from torch.utils.data import DataLoader, Dataset
 10 | from torchvision import datasets, transforms
 11 | 
 12 | from train import input_size
 13 | from models import build_model
 14 | 
 15 | test_transforms = transforms.Compose([
 16 |     transforms.Resize(input_size),
 17 |     transforms.ToTensor(),
 18 |     transforms.Normalize([0.3568, 0.3141, 0.2781], [0.1752, 0.1857, 0.1879])
 19 | ])
 20 | 
 21 | gallery_datasets = datasets.ImageFolder(os.path.join("data", "gallery"),
 22 |                                         transform=test_transforms)
 23 | query_datasets = datasets.ImageFolder(os.path.join("data", "query"),
 24 |                                       transform=test_transforms)
 25 | 
 26 | gallery_dataloader = DataLoader(gallery_datasets,
 27 |                                 batch_size=128,
 28 |                                 drop_last=False,
 29 |                                 shuffle=False,
 30 |                                 num_workers=1)
 31 | 
 32 | query_dataloader = DataLoader(query_datasets,
 33 |                               batch_size=128,
 34 |                               drop_last=False,
 35 |                               shuffle=False,
 36 |                               num_workers=1)
 37 | 
 38 | use_gpu = torch.cuda.is_available()
 39 | 
 40 | class_names = gallery_datasets.classes
 41 | 
 42 | 
 43 | def fliplr(img):
 44 |     '''flip horizontal'''
 45 |     inv_idx = torch.arange(img.size(3) - 1, -1, -1).long()
 46 |     img_flip = img.index_select(3, inv_idx)  # flip along w
 47 |     return img_flip
 48 | 
 49 | 
 50 | def extract_features(model, dataloader):
 51 |     features = torch.FloatTensor()
 52 |     count = 0
 53 |     for data in dataloader:
 54 |         img, label = data
 55 |         bs, c, h, w = img.size()
 56 |         count += bs
 57 |         ff = torch.FloatTensor(bs, 96).zero_()  # 2048 if res50
 58 |         print(count, end='\r')
 59 |         sys.stdout.flush()
 60 |         # add two features
 61 |         for i in range(2):
 62 |             if i == 1:
 63 |                 img = fliplr(img)
 64 |             input_img = Variable(img.cuda())
 65 |             # print("=", input_img.shape)
 66 |             feature = model(input_img)
 67 |             feature = feature.data.cpu()
 68 |             # print(ff.shape, feature.shape)
 69 |             ff = ff + feature
 70 |         # norm features
 71 |         fnorm = torch.norm(ff, p=2, dim=1, keepdim=True)
 72 |         ff = ff.div(fnorm.expand_as(ff))
 73 | 
 74 |         features = torch.cat((features, ff), 0)
 75 |     return features
 76 | 
 77 | 
 78 | def get_label(img_path):
 79 |     labels = []
 80 |     for path, _ in img_path:
 81 |         filename = os.path.basename(path)
 82 |         label = filename.split('_')[0]
 83 |         if label[0:2] == '-1':
 84 |             labels.append(-1)
 85 |         else:
 86 |             labels.append(label)
 87 |     return labels
 88 | 
 89 | 
 90 | def compute_mAP(index, good_index, junk_index):
 91 |     ap = 0
 92 |     cmc = torch.IntTensor(len(index)).zero_()  #len = 20 得到前20个
 93 |     if good_index.size == 0:
 94 |         cmc[0] = -1
 95 |         return ap, cmc
 96 | 
 97 |     # remove junk index
 98 |     mask = np.in1d(index, junk_index, invert=True)
 99 |     index = index[mask]
100 | 
101 |     # find good index
102 |     ngood = len(good_index)
103 |     mask = np.in1d(index, good_index)
104 |     rows_good = np.argwhere(mask == True)
105 |     rows_good = rows_good.flatten()
106 | 
107 |     cmc[rows_good[0]:] = 1
108 |     for i in range(ngood):
109 |         d_recall = 1.0 / ngood
110 |         precision = (i + 1) * 1.0 / (rows_good[i] + 1)
111 |         if rows_good[i] != 0:
112 |             old_precision = i * 1.0 / rows_good[i]
113 |         else:
114 |             old_precision = 1.0
115 |         ap = ap + d_recall * (old_precision + precision) / 2
116 |     return ap, cmc
117 | 
118 | 
119 | def evaluate(qf, ql, gf, gl):
120 |     query = qf.view(-1, 1)  # query 是一张图
121 |     score = torch.mm(gf, query)  # 计算得分[1, num]
122 |     score = score.squeeze(1).cpu()
123 |     score = score.numpy()
124 |     #predict index
125 |     index = np.argsort(score)
126 |     index = index[::-1]  # index 倒过来
127 |     # 得到前20个
128 |     # index = index[0:20]
129 | 
130 |     # good index , label一致
131 |     good_index = np.argwhere(gl == ql)
132 |     # print("good_index", gl, '\n', ql, gl == ql, type(gl))
133 |     junk_index = np.argwhere(gl == "bg")
134 | 
135 |     CMC = compute_mAP(index, good_index, junk_index)
136 |     return CMC
137 | 
138 | 
139 | if __name__ == "__main__":
140 |     parser = argparse.ArgumentParser('help')
141 |     parser.add_argument('--weight_path',
142 |                         type=str,
143 |                         default="./checkpints/last.pt")
144 |     parser.add_argument("--model", type=str, default="mudeep")
145 |     args = parser.parse_args()
146 | 
147 |     model = build_model(name=args.model, num_classes=len(class_names))
148 |     assert os.path.isfile(
149 |         "./checkpoint/%s/%s_last.pt" %
150 |         (args.model, args.model)), "Error: no checkpoint file found!"
151 |     print('Loading from checkpoint/last.pt')
152 |     checkpoint = torch.load("./checkpoint/%s/%s_last.pt" %
153 |                             (args.model, args.model))
154 |     net_dict = checkpoint['net_dict']
155 |     model.load_state_dict(net_dict)
156 | 
157 |     model.eval()
158 |     if use_gpu:
159 |         model = model.cuda()
160 | 
161 |     gallery_features = extract_features(model, gallery_dataloader)
162 |     query_features = extract_features(model, query_dataloader)
163 | 
164 |     gallery_label = np.array(get_label(gallery_datasets.imgs))
165 |     query_label = np.array(get_label(query_datasets.imgs))
166 | 
167 |     if use_gpu:
168 |         gallery_features = gallery_features.cuda()
169 |         query_features = query_features.cuda()
170 | 
171 |     CMC = torch.IntTensor(len(gallery_label)).zero_()
172 |     ap = 0.0
173 |     for i in range(len(query_label)):
174 |         ap_tmp, CMC_tmp = evaluate(query_features[i], query_label[i],
175 |                                    gallery_features, gallery_label)
176 |         if CMC_tmp[0] == -1:
177 |             continue
178 |         CMC = CMC + CMC_tmp
179 |         # print(i, ":",ap_tmp)
180 |         ap += ap_tmp
181 | 
182 |     CMC = CMC.float()
183 |     CMC = CMC / len(query_label)
184 | 
185 |     print("\tRank@1:%f\n\tRank@5:%f\n\tRank@10:%f\n\tmAP:%f" %
186 |           (CMC[0], CMC[4], CMC[9], ap / len(query_label)))
187 | 


--------------------------------------------------------------------------------
/deep_sort/deep/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | from .models import build_model
 7 | # from .train import input_size
 8 | 
 9 | 
10 | class Extractor(object):
11 |     def __init__(self, model_name, model_path, use_cuda=True):
12 |         self.net = build_model(name=model_name,
13 |                                num_classes=96)  #osnet_small(96, reid=True)
14 |         self.device = "cuda" if torch.cuda.is_available(
15 |         ) and use_cuda else "cpu"
16 |         state_dict = torch.load(model_path)['net_dict']
17 |         self.net.load_state_dict(state_dict)
18 |         print("Loading weights from {}... Done!".format(model_path))
19 |         self.net.to(self.device)
20 |         self.size = (128,128)
21 |         self.norm = transforms.Compose([
22 |             transforms.ToTensor(),
23 |             transforms.Normalize([0.3568, 0.3141, 0.2781],
24 |                                  [0.1752, 0.1857, 0.1879])
25 |         ])
26 | 
27 |     def _preprocess(self, im_crops):
28 |         """
29 |         TODO:
30 |             1. to float with scale from 0 to 1
31 |             2. resize to (64, 128) as Market1501 dataset did
32 |             3. concatenate to a numpy array
33 |             3. to torch Tensor
34 |             4. normalize
35 |         """
36 |         def _resize(im, size):
37 |             return cv2.resize(im.astype(np.float32) / 255., size)
38 | 
39 |         im_batch = torch.cat([
40 |             self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops
41 |         ],
42 |                              dim=0).float()
43 |         return im_batch
44 | 
45 |     def __call__(self, im_crops):
46 |         im_batch = self._preprocess(im_crops)
47 |         with torch.no_grad():
48 |             im_batch = im_batch.to(self.device)
49 |             features = self.net(im_batch)
50 |         return features.cpu().numpy()
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     img = cv2.imread("data/reid/cutout13_0/cutout13_0_0.jpg")[:, :, (2, 1, 0)]
55 |     extr = Extractor("mudeep","checkpoint/best.pt")
56 |     feature = extr([img, img])
57 | 


--------------------------------------------------------------------------------
/deep_sort/deep/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class BasicBlock(nn.Module):
  7 |     def __init__(self, c_in, c_out, is_downsample=False):
  8 |         super(BasicBlock, self).__init__()
  9 |         self.is_downsample = is_downsample
 10 |         if is_downsample:
 11 |             self.conv1 = nn.Conv2d(c_in,
 12 |                                    c_out,
 13 |                                    3,
 14 |                                    stride=2,
 15 |                                    padding=1,
 16 |                                    bias=False)
 17 |         else:
 18 |             self.conv1 = nn.Conv2d(c_in,
 19 |                                    c_out,
 20 |                                    3,
 21 |                                    stride=1,
 22 |                                    padding=1,
 23 |                                    bias=False)
 24 |         self.bn1 = nn.BatchNorm2d(c_out)
 25 |         self.relu = nn.ReLU(True)
 26 |         self.conv2 = nn.Conv2d(c_out,
 27 |                                c_out,
 28 |                                3,
 29 |                                stride=1,
 30 |                                padding=1,
 31 |                                bias=False)
 32 |         self.bn2 = nn.BatchNorm2d(c_out)
 33 |         if is_downsample:
 34 |             self.downsample = nn.Sequential(
 35 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 36 |                 nn.BatchNorm2d(c_out))
 37 |         elif c_in != c_out:
 38 |             self.downsample = nn.Sequential(
 39 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 40 |                 nn.BatchNorm2d(c_out))
 41 |             self.is_downsample = True
 42 | 
 43 |     def forward(self, x):
 44 |         y = self.conv1(x)
 45 |         y = self.bn1(y)
 46 |         y = self.relu(y)
 47 |         y = self.conv2(y)
 48 |         y = self.bn2(y)
 49 |         if self.is_downsample:
 50 |             x = self.downsample(x)
 51 |         return F.relu(x.add(y), True)
 52 | 
 53 | 
 54 | def make_layers(c_in, c_out, repeat_times, is_downsample=False):
 55 |     blocks = []
 56 |     for i in range(repeat_times):
 57 |         if i == 0:
 58 |             blocks += [
 59 |                 BasicBlock(c_in, c_out, is_downsample=is_downsample),
 60 |             ]
 61 |         else:
 62 |             blocks += [
 63 |                 BasicBlock(c_out, c_out),
 64 |             ]
 65 |     return nn.Sequential(*blocks)
 66 | 
 67 | 
 68 | class Net(nn.Module):
 69 |     def __init__(self, num_classes=751, reid=False):
 70 |         super(Net, self).__init__()
 71 |         # 3 128 64
 72 |         self.conv = nn.Sequential(
 73 |             nn.Conv2d(3, 64, 3, stride=1, padding=1),
 74 |             nn.BatchNorm2d(64),
 75 |             nn.ReLU(inplace=True),
 76 |             # nn.Conv2d(32,32,3,stride=1,padding=1),
 77 |             # nn.BatchNorm2d(32),
 78 |             # nn.ReLU(inplace=True),
 79 |             nn.MaxPool2d(3, 2, padding=1),
 80 |         )
 81 |         # 32 64 32
 82 |         self.layer1 = make_layers(64, 64, 2, False)
 83 |         # 32 64 32
 84 |         self.layer2 = make_layers(64, 128, 2, True)
 85 |         # 64 32 16
 86 |         self.layer3 = make_layers(128, 256, 2, True)
 87 |         # 128 16 8
 88 |         self.layer4 = make_layers(256, 512, 2, True)
 89 |         # 256 8 4
 90 |         self.avgpool = nn.AvgPool2d((8, 4), 1)
 91 |         # 256 1 1
 92 |         self.reid = reid
 93 |         self.classifier = nn.Sequential(
 94 |             nn.Linear(512, 256),
 95 |             nn.BatchNorm1d(256),
 96 |             nn.ReLU(inplace=True),
 97 |             nn.Dropout(),
 98 |             nn.Linear(256, num_classes),
 99 |         )
100 | 
101 |     def forward(self, x):
102 |         x = self.conv(x)
103 |         x = self.layer1(x)
104 |         x = self.layer2(x)
105 |         x = self.layer3(x)
106 |         x = self.layer4(x)
107 |         x = self.avgpool(x)
108 |         x = x.view(x.size(0), -1)
109 |         # B x 128
110 |         if self.reid:
111 |             x = x.div(x.norm(p=2, dim=1, keepdim=True))
112 |             return x
113 |         # classifier
114 |         x = self.classifier(x)
115 |         return x
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     net = Net()
120 |     x = torch.randn(4, 3, 128, 64)
121 |     y = net(x)
122 |     import ipdb
123 |     ipdb.set_trace()
124 | 


--------------------------------------------------------------------------------
/deep_sort/deep/models/__init__.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import torch
  3 | 
  4 | from .pcb import *
  5 | from .mlfn import *
  6 | from .hacnn import *
  7 | from .osnet import *
  8 | from .senet import *
  9 | from .mudeep import *
 10 | from .nasnet import *
 11 | from .resnet import *
 12 | from .densenet import *
 13 | from .xception import *
 14 | from .osnet_ain import *
 15 | from .resnetmid import *
 16 | from .shufflenet import *
 17 | from .squeezenet import *
 18 | from .inceptionv4 import *
 19 | from .mobilenetv2 import *
 20 | from .resnet_ibn_a import *
 21 | from .resnet_ibn_b import *
 22 | from .shufflenetv2 import *
 23 | from .inceptionresnetv2 import *
 24 | 
 25 | __model_factory = {
 26 |     # image classification models
 27 |     'resnet18': resnet18,
 28 |     'resnet34': resnet34,
 29 |     'resnet50': resnet50,
 30 |     'resnet101': resnet101,
 31 |     'resnet152': resnet152,
 32 |     'resnext50_32x4d': resnext50_32x4d,
 33 |     'resnext101_32x8d': resnext101_32x8d,
 34 |     'resnet50_fc512': resnet50_fc512,
 35 |     'se_resnet50': se_resnet50,
 36 |     'se_resnet50_fc512': se_resnet50_fc512,
 37 |     'se_resnet101': se_resnet101,
 38 |     'se_resnext50_32x4d': se_resnext50_32x4d,
 39 |     'se_resnext101_32x4d': se_resnext101_32x4d,
 40 |     'densenet121': densenet121,
 41 |     'densenet169': densenet169,
 42 |     'densenet201': densenet201,
 43 |     'densenet161': densenet161,
 44 |     'densenet121_fc512': densenet121_fc512,
 45 |     'inceptionresnetv2': inceptionresnetv2,
 46 |     'inceptionv4': inceptionv4,
 47 |     'xception': xception,
 48 |     'resnet50_ibn_a': resnet50_ibn_a,
 49 |     'resnet50_ibn_b': resnet50_ibn_b,
 50 |     # lightweight models
 51 |     'nasnsetmobile': nasnetamobile,
 52 |     'mobilenetv2_x1_0': mobilenetv2_x1_0,
 53 |     'mobilenetv2_x1_4': mobilenetv2_x1_4,
 54 |     'shufflenet': shufflenet,
 55 |     'squeezenet1_0': squeezenet1_0,
 56 |     'squeezenet1_0_fc512': squeezenet1_0_fc512,
 57 |     'squeezenet1_1': squeezenet1_1,
 58 |     'shufflenet_v2_x0_5': shufflenet_v2_x0_5,
 59 |     'shufflenet_v2_x1_0': shufflenet_v2_x1_0,
 60 |     'shufflenet_v2_x1_5': shufflenet_v2_x1_5,
 61 |     'shufflenet_v2_x2_0': shufflenet_v2_x2_0,
 62 |     # reid-specific models
 63 |     'mudeep': MuDeep,
 64 |     'resnet50mid': resnet50mid,
 65 |     'hacnn': HACNN,
 66 |     'pcb_p6': pcb_p6,
 67 |     'pcb_p4': pcb_p4,
 68 |     'mlfn': mlfn,
 69 |     'osnet_x1_0': osnet_x1_0,
 70 |     'osnet_x0_75': osnet_x0_75,
 71 |     'osnet_x0_5': osnet_x0_5,
 72 |     'osnet_x0_25': osnet_x0_25,
 73 |     'osnet_ibn_x1_0': osnet_ibn_x1_0,
 74 |     'osnet_ain_x1_0': osnet_ain_x1_0
 75 | }
 76 | 
 77 | 
 78 | def show_avai_models():
 79 |     """Displays available models.
 80 | 
 81 |     Examples::
 82 |         >>> from torchreid import models
 83 |         >>> models.show_avai_models()
 84 |     """
 85 |     print(list(__model_factory.keys()))
 86 | 
 87 | 
 88 | def build_model(name,
 89 |                 num_classes,
 90 |                 loss='softmax',
 91 |                 pretrained=True,
 92 |                 use_gpu=True):
 93 |     """A function wrapper for building a model.
 94 | 
 95 |     Args:
 96 |         name (str): model name.
 97 |         num_classes (int): number of training identities.
 98 |         loss (str, optional): loss function to optimize the model. Currently
 99 |             supports "softmax" and "triplet". Default is "softmax".
100 |         pretrained (bool, optional): whether to load ImageNet-pretrained weights.
101 |             Default is True.
102 |         use_gpu (bool, optional): whether to use gpu. Default is True.
103 | 
104 |     Returns:
105 |         nn.Module
106 | 
107 |     Examples::
108 |         >>> from torchreid import models
109 |         >>> model = models.build_model('resnet50', 751, loss='softmax')
110 |     """
111 |     avai_models = list(__model_factory.keys())
112 |     if name not in avai_models:
113 |         raise KeyError('Unknown model: {}. Must be one of {}'.format(
114 |             name, avai_models))
115 |     return __model_factory[name](num_classes=num_classes,
116 |                                  loss=loss,
117 |                                  pretrained=pretrained,
118 |                                  use_gpu=use_gpu)
119 | 


--------------------------------------------------------------------------------
/deep_sort/deep/models/mudeep.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, absolute_import
  2 | import torch
  3 | from torch import nn
  4 | from torch.nn import functional as F
  5 | 
  6 | __all__ = ['MuDeep']
  7 | 
  8 | 
  9 | class ConvBlock(nn.Module):
 10 |     """Basic convolutional block.
 11 |     
 12 |     convolution + batch normalization + relu.
 13 | 
 14 |     Args:
 15 |         in_c (int): number of input channels.
 16 |         out_c (int): number of output channels.
 17 |         k (int or tuple): kernel size.
 18 |         s (int or tuple): stride.
 19 |         p (int or tuple): padding.
 20 |     """
 21 | 
 22 |     def __init__(self, in_c, out_c, k, s, p):
 23 |         super(ConvBlock, self).__init__()
 24 |         self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
 25 |         self.bn = nn.BatchNorm2d(out_c)
 26 | 
 27 |     def forward(self, x):
 28 |         return F.relu(self.bn(self.conv(x)))
 29 | 
 30 | 
 31 | class ConvLayers(nn.Module):
 32 |     """Preprocessing layers."""
 33 | 
 34 |     def __init__(self):
 35 |         super(ConvLayers, self).__init__()
 36 |         self.conv1 = ConvBlock(3, 48, k=3, s=1, p=1)
 37 |         self.conv2 = ConvBlock(48, 96, k=3, s=1, p=1)
 38 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 39 | 
 40 |     def forward(self, x):
 41 |         x = self.conv1(x)
 42 |         x = self.conv2(x)
 43 |         x = self.maxpool(x)
 44 |         return x
 45 | 
 46 | 
 47 | class MultiScaleA(nn.Module):
 48 |     """Multi-scale stream layer A (Sec.3.1)"""
 49 | 
 50 |     def __init__(self):
 51 |         super(MultiScaleA, self).__init__()
 52 |         self.stream1 = nn.Sequential(
 53 |             ConvBlock(96, 96, k=1, s=1, p=0),
 54 |             ConvBlock(96, 24, k=3, s=1, p=1),
 55 |         )
 56 |         self.stream2 = nn.Sequential(
 57 |             nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
 58 |             ConvBlock(96, 24, k=1, s=1, p=0),
 59 |         )
 60 |         self.stream3 = ConvBlock(96, 24, k=1, s=1, p=0)
 61 |         self.stream4 = nn.Sequential(
 62 |             ConvBlock(96, 16, k=1, s=1, p=0),
 63 |             ConvBlock(16, 24, k=3, s=1, p=1),
 64 |             ConvBlock(24, 24, k=3, s=1, p=1),
 65 |         )
 66 | 
 67 |     def forward(self, x):
 68 |         s1 = self.stream1(x)
 69 |         s2 = self.stream2(x)
 70 |         s3 = self.stream3(x)
 71 |         s4 = self.stream4(x)
 72 |         y = torch.cat([s1, s2, s3, s4], dim=1)
 73 |         return y
 74 | 
 75 | 
 76 | class Reduction(nn.Module):
 77 |     """Reduction layer (Sec.3.1)"""
 78 | 
 79 |     def __init__(self):
 80 |         super(Reduction, self).__init__()
 81 |         self.stream1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 82 |         self.stream2 = ConvBlock(96, 96, k=3, s=2, p=1)
 83 |         self.stream3 = nn.Sequential(
 84 |             ConvBlock(96, 48, k=1, s=1, p=0),
 85 |             ConvBlock(48, 56, k=3, s=1, p=1),
 86 |             ConvBlock(56, 64, k=3, s=2, p=1),
 87 |         )
 88 | 
 89 |     def forward(self, x):
 90 |         s1 = self.stream1(x)
 91 |         s2 = self.stream2(x)
 92 |         s3 = self.stream3(x)
 93 |         y = torch.cat([s1, s2, s3], dim=1)
 94 |         return y
 95 | 
 96 | 
 97 | class MultiScaleB(nn.Module):
 98 |     """Multi-scale stream layer B (Sec.3.1)"""
 99 | 
100 |     def __init__(self):
101 |         super(MultiScaleB, self).__init__()
102 |         self.stream1 = nn.Sequential(
103 |             nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
104 |             ConvBlock(256, 256, k=1, s=1, p=0),
105 |         )
106 |         self.stream2 = nn.Sequential(
107 |             ConvBlock(256, 64, k=1, s=1, p=0),
108 |             ConvBlock(64, 128, k=(1, 3), s=1, p=(0, 1)),
109 |             ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),
110 |         )
111 |         self.stream3 = ConvBlock(256, 256, k=1, s=1, p=0)
112 |         self.stream4 = nn.Sequential(
113 |             ConvBlock(256, 64, k=1, s=1, p=0),
114 |             ConvBlock(64, 64, k=(1, 3), s=1, p=(0, 1)),
115 |             ConvBlock(64, 128, k=(3, 1), s=1, p=(1, 0)),
116 |             ConvBlock(128, 128, k=(1, 3), s=1, p=(0, 1)),
117 |             ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),
118 |         )
119 | 
120 |     def forward(self, x):
121 |         s1 = self.stream1(x)
122 |         s2 = self.stream2(x)
123 |         s3 = self.stream3(x)
124 |         s4 = self.stream4(x)
125 |         return s1, s2, s3, s4
126 | 
127 | 
128 | class Fusion(nn.Module):
129 |     """Saliency-based learning fusion layer (Sec.3.2)"""
130 | 
131 |     def __init__(self):
132 |         super(Fusion, self).__init__()
133 |         self.a1 = nn.Parameter(torch.rand(1, 256, 1, 1))
134 |         self.a2 = nn.Parameter(torch.rand(1, 256, 1, 1))
135 |         self.a3 = nn.Parameter(torch.rand(1, 256, 1, 1))
136 |         self.a4 = nn.Parameter(torch.rand(1, 256, 1, 1))
137 | 
138 |         # We add an average pooling layer to reduce the spatial dimension
139 |         # of feature maps, which differs from the original paper.
140 |         self.avgpool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0)
141 | 
142 |     def forward(self, x1, x2, x3, x4):
143 |         s1 = self.a1.expand_as(x1) * x1
144 |         s2 = self.a2.expand_as(x2) * x2
145 |         s3 = self.a3.expand_as(x3) * x3
146 |         s4 = self.a4.expand_as(x4) * x4
147 |         y = self.avgpool(s1 + s2 + s3 + s4)
148 |         return y
149 | 
150 | 
151 | class MuDeep(nn.Module):
152 |     """Multiscale deep neural network.
153 | 
154 |     Reference:
155 |         Qian et al. Multi-scale Deep Learning Architectures
156 |         for Person Re-identification. ICCV 2017.
157 | 
158 |     Public keys:
159 |         - ``mudeep``: Multiscale deep neural network.
160 |     """
161 | 
162 |     def __init__(self, num_classes, loss='softmax', **kwargs):
163 |         super(MuDeep, self).__init__()
164 |         self.loss = loss
165 | 
166 |         self.block1 = ConvLayers()
167 |         self.block2 = MultiScaleA()
168 |         self.block3 = Reduction()
169 |         self.block4 = MultiScaleB()
170 |         self.block5 = Fusion()
171 | 
172 |         # Due to this fully connected layer, input image has to be fixed
173 |         # in shape, i.e. (3, 256, 128), such that the last convolutional feature
174 |         # maps are of shape (256, 16, 8). If input shape is changed,
175 |         # the input dimension of this layer has to be changed accordingly.
176 |         self.fc = nn.Sequential(
177 |             nn.Linear(256 * 8 * 8, 4096),
178 |             nn.BatchNorm1d(4096),
179 |             nn.ReLU(),
180 |         )
181 |         self.classifier = nn.Linear(4096, num_classes)
182 |         self.feat_dim = 4096
183 | 
184 |     def featuremaps(self, x):
185 |         x = self.block1(x)
186 |         x = self.block2(x)
187 |         x = self.block3(x)
188 |         x = self.block4(x)
189 |         x = self.block5(*x)
190 |         return x
191 | 
192 |     def forward(self, x, return_featuremaps=False):
193 |         x = self.featuremaps(x)
194 |         if return_featuremaps:
195 |             return x
196 |         x = x.view(x.size(0), -1)
197 |         x = self.fc(x)
198 |         y = self.classifier(x)
199 | 
200 |         if self.loss == 'softmax':
201 |             return y
202 |         elif self.loss == 'triplet':
203 |             return y, x
204 |         else:
205 |             raise KeyError('Unsupported loss: {}'.format(self.loss))
206 | 


--------------------------------------------------------------------------------
/deep_sort/deep/models/original_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class BasicBlock(nn.Module):
  6 |     def __init__(self, c_in, c_out,is_downsample=False):
  7 |         super(BasicBlock,self).__init__()
  8 |         self.is_downsample = is_downsample
  9 |         if is_downsample:
 10 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=2, padding=1, bias=False)
 11 |         else:
 12 |             self.conv1 = nn.Conv2d(c_in, c_out, 3, stride=1, padding=1, bias=False)
 13 |         self.bn1 = nn.BatchNorm2d(c_out)
 14 |         self.relu = nn.ReLU(True)
 15 |         self.conv2 = nn.Conv2d(c_out,c_out,3,stride=1,padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(c_out)
 17 |         if is_downsample:
 18 |             self.downsample = nn.Sequential(
 19 |                 nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
 20 |                 nn.BatchNorm2d(c_out)
 21 |             )
 22 |         elif c_in != c_out:
 23 |             self.downsample = nn.Sequential(
 24 |                 nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
 25 |                 nn.BatchNorm2d(c_out)
 26 |             )
 27 |             self.is_downsample = True
 28 | 
 29 |     def forward(self,x):
 30 |         y = self.conv1(x)
 31 |         y = self.bn1(y)
 32 |         y = self.relu(y)
 33 |         y = self.conv2(y)
 34 |         y = self.bn2(y)
 35 |         if self.is_downsample:
 36 |             x = self.downsample(x)
 37 |         return F.relu(x.add(y),True)
 38 | 
 39 | def make_layers(c_in,c_out,repeat_times, is_downsample=False):
 40 |     blocks = []
 41 |     for i in range(repeat_times):
 42 |         if i ==0:
 43 |             blocks += [BasicBlock(c_in,c_out, is_downsample=is_downsample),]
 44 |         else:
 45 |             blocks += [BasicBlock(c_out,c_out),]
 46 |     return nn.Sequential(*blocks)
 47 | 
 48 | class Net(nn.Module):
 49 |     def __init__(self, num_classes=625 ,reid=False):
 50 |         super(Net,self).__init__()
 51 |         # 3 128 64
 52 |         self.conv = nn.Sequential(
 53 |             nn.Conv2d(3,32,3,stride=1,padding=1),
 54 |             nn.BatchNorm2d(32),
 55 |             nn.ELU(inplace=True),
 56 |             nn.Conv2d(32,32,3,stride=1,padding=1),
 57 |             nn.BatchNorm2d(32),
 58 |             nn.ELU(inplace=True),
 59 |             nn.MaxPool2d(3,2,padding=1),
 60 |         )
 61 |         # 32 64 32
 62 |         self.layer1 = make_layers(32,32,2,False)
 63 |         # 32 64 32
 64 |         self.layer2 = make_layers(32,64,2,True)
 65 |         # 64 32 16
 66 |         self.layer3 = make_layers(64,128,2,True)
 67 | 
 68 |         self.gap = nn.AdaptiveAvgPool2d(1)
 69 | 
 70 |         # 128 16 8
 71 |         self.dense = nn.Sequential(
 72 |             nn.Dropout(p=0.5),
 73 |             nn.Linear(128, 128),
 74 |             nn.BatchNorm1d(128),
 75 |             nn.ELU(inplace=True)
 76 |         )
 77 |         # 256 1 1 
 78 |         self.reid = reid
 79 |         self.batch_norm = nn.BatchNorm1d(128)
 80 |         self.classifier = nn.Sequential(
 81 |             nn.Linear(128, num_classes),
 82 |         )
 83 |     
 84 |     def forward(self, x):
 85 |         bs = x.shape[0]
 86 |         x = self.conv(x)
 87 |         x = self.layer1(x)
 88 |         x = self.layer2(x)
 89 |         x = self.layer3(x)
 90 |         x = self.gap(x).view(bs, -1)
 91 | 
 92 |         if self.reid:
 93 |             x = self.dense[0](x)
 94 |             x = self.dense[1](x)
 95 |             x = x.div(x.norm(p=2,dim=1,keepdim=True))
 96 |             return x
 97 | 
 98 |         x = self.dense(x)
 99 |         # B x 128
100 |         # classifier
101 |         x = self.classifier(x)
102 |         return x
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     net = Net(reid=True)
107 |     x = torch.randn(4,3,128,64)
108 |     y = net(x)
109 |     import ipdb; ipdb.set_trace()
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/deep_sort/deep/models/shufflenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, absolute_import
  2 | import torch
  3 | import torch.utils.model_zoo as model_zoo
  4 | from torch import nn
  5 | from torch.nn import functional as F
  6 | 
  7 | __all__ = ['shufflenet']
  8 | 
  9 | model_urls = {
 10 |     # training epoch = 90, top1 = 61.8
 11 |     'imagenet':
 12 |     'https://mega.nz/#!RDpUlQCY!tr_5xBEkelzDjveIYBBcGcovNCOrgfiJO9kiidz9fZM',
 13 | }
 14 | 
 15 | 
 16 | class ChannelShuffle(nn.Module):
 17 | 
 18 |     def __init__(self, num_groups):
 19 |         super(ChannelShuffle, self).__init__()
 20 |         self.g = num_groups
 21 | 
 22 |     def forward(self, x):
 23 |         b, c, h, w = x.size()
 24 |         n = c // self.g
 25 |         # reshape
 26 |         x = x.view(b, self.g, n, h, w)
 27 |         # transpose
 28 |         x = x.permute(0, 2, 1, 3, 4).contiguous()
 29 |         # flatten
 30 |         x = x.view(b, c, h, w)
 31 |         return x
 32 | 
 33 | 
 34 | class Bottleneck(nn.Module):
 35 | 
 36 |     def __init__(
 37 |         self,
 38 |         in_channels,
 39 |         out_channels,
 40 |         stride,
 41 |         num_groups,
 42 |         group_conv1x1=True
 43 |     ):
 44 |         super(Bottleneck, self).__init__()
 45 |         assert stride in [1, 2], 'Warning: stride must be either 1 or 2'
 46 |         self.stride = stride
 47 |         mid_channels = out_channels // 4
 48 |         if stride == 2: out_channels -= in_channels
 49 |         # group conv is not applied to first conv1x1 at stage 2
 50 |         num_groups_conv1x1 = num_groups if group_conv1x1 else 1
 51 |         self.conv1 = nn.Conv2d(
 52 |             in_channels,
 53 |             mid_channels,
 54 |             1,
 55 |             groups=num_groups_conv1x1,
 56 |             bias=False
 57 |         )
 58 |         self.bn1 = nn.BatchNorm2d(mid_channels)
 59 |         self.shuffle1 = ChannelShuffle(num_groups)
 60 |         self.conv2 = nn.Conv2d(
 61 |             mid_channels,
 62 |             mid_channels,
 63 |             3,
 64 |             stride=stride,
 65 |             padding=1,
 66 |             groups=mid_channels,
 67 |             bias=False
 68 |         )
 69 |         self.bn2 = nn.BatchNorm2d(mid_channels)
 70 |         self.conv3 = nn.Conv2d(
 71 |             mid_channels, out_channels, 1, groups=num_groups, bias=False
 72 |         )
 73 |         self.bn3 = nn.BatchNorm2d(out_channels)
 74 |         if stride == 2: self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)
 75 | 
 76 |     def forward(self, x):
 77 |         out = F.relu(self.bn1(self.conv1(x)))
 78 |         out = self.shuffle1(out)
 79 |         out = self.bn2(self.conv2(out))
 80 |         out = self.bn3(self.conv3(out))
 81 |         if self.stride == 2:
 82 |             res = self.shortcut(x)
 83 |             out = F.relu(torch.cat([res, out], 1))
 84 |         else:
 85 |             out = F.relu(x + out)
 86 |         return out
 87 | 
 88 | 
 89 | # configuration of (num_groups: #out_channels) based on Table 1 in the paper
 90 | cfg = {
 91 |     1: [144, 288, 576],
 92 |     2: [200, 400, 800],
 93 |     3: [240, 480, 960],
 94 |     4: [272, 544, 1088],
 95 |     8: [384, 768, 1536],
 96 | }
 97 | 
 98 | 
 99 | class ShuffleNet(nn.Module):
100 |     """ShuffleNet.
101 | 
102 |     Reference:
103 |         Zhang et al. ShuffleNet: An Extremely Efficient Convolutional Neural
104 |         Network for Mobile Devices. CVPR 2018.
105 | 
106 |     Public keys:
107 |         - ``shufflenet``: ShuffleNet (groups=3).
108 |     """
109 | 
110 |     def __init__(self, num_classes, loss='softmax', num_groups=3, **kwargs):
111 |         super(ShuffleNet, self).__init__()
112 |         self.loss = loss
113 | 
114 |         self.conv1 = nn.Sequential(
115 |             nn.Conv2d(3, 24, 3, stride=2, padding=1, bias=False),
116 |             nn.BatchNorm2d(24),
117 |             nn.ReLU(),
118 |             nn.MaxPool2d(3, stride=2, padding=1),
119 |         )
120 | 
121 |         self.stage2 = nn.Sequential(
122 |             Bottleneck(
123 |                 24, cfg[num_groups][0], 2, num_groups, group_conv1x1=False
124 |             ),
125 |             Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
126 |             Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
127 |             Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),
128 |         )
129 | 
130 |         self.stage3 = nn.Sequential(
131 |             Bottleneck(cfg[num_groups][0], cfg[num_groups][1], 2, num_groups),
132 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
133 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
134 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
135 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
136 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
137 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
138 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),
139 |         )
140 | 
141 |         self.stage4 = nn.Sequential(
142 |             Bottleneck(cfg[num_groups][1], cfg[num_groups][2], 2, num_groups),
143 |             Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
144 |             Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
145 |             Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),
146 |         )
147 | 
148 |         self.classifier = nn.Linear(cfg[num_groups][2], num_classes)
149 |         self.feat_dim = cfg[num_groups][2]
150 | 
151 |     def forward(self, x):
152 |         x = self.conv1(x)
153 |         x = self.stage2(x)
154 |         x = self.stage3(x)
155 |         x = self.stage4(x)
156 |         x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), -1)
157 | 
158 |         if not self.training:
159 |             return x
160 | 
161 |         y = self.classifier(x)
162 | 
163 |         if self.loss == 'softmax':
164 |             return y
165 |         elif self.loss == 'triplet':
166 |             return y, x
167 |         else:
168 |             raise KeyError('Unsupported loss: {}'.format(self.loss))
169 | 
170 | 
171 | def init_pretrained_weights(model, model_url):
172 |     """Initializes model with pretrained weights.
173 |     
174 |     Layers that don't match with pretrained layers in name or size are kept unchanged.
175 |     """
176 |     pretrain_dict = model_zoo.load_url(model_url)
177 |     model_dict = model.state_dict()
178 |     pretrain_dict = {
179 |         k: v
180 |         for k, v in pretrain_dict.items()
181 |         if k in model_dict and model_dict[k].size() == v.size()
182 |     }
183 |     model_dict.update(pretrain_dict)
184 |     model.load_state_dict(model_dict)
185 | 
186 | 
187 | def shufflenet(num_classes, loss='softmax', pretrained=True, **kwargs):
188 |     model = ShuffleNet(num_classes, loss, **kwargs)
189 |     if pretrained:
190 |         #init_pretrained_weights(model, model_urls['imagenet'])
191 |         import warnings
192 |         warnings.warn(
193 |             'The imagenet pretrained weights need to be manually downloaded from {}'
194 |             .format(model_urls['imagenet'])
195 |         )
196 |     return model
197 | 


--------------------------------------------------------------------------------
/deep_sort/deep/oldfeature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | from .model import Net
 7 | 
 8 | 
 9 | class Extractor(object):
10 |     def __init__(self, model_path, use_cuda=True):
11 |         self.net = Net(reid=True)
12 |         self.device = "cuda" if torch.cuda.is_available(
13 |         ) and use_cuda else "cpu"
14 |         state_dict = torch.load(
15 |             model_path, map_location=lambda storage, loc: storage)['net_dict']
16 | 
17 |         self.net.load_state_dict(state_dict)
18 |         print("Loading weights from {}... Done!".format(model_path))
19 |         self.net.to(self.device)
20 |         self.size = (64, 128)
21 |         self.norm = transforms.Compose([
22 |             transforms.ToTensor(),
23 |             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
24 |         ])
25 | 
26 |     def _preprocess(self, im_crops):
27 |         """
28 |         TODO:
29 |             1. to float with scale from 0 to 1
30 |             2. resize to (64, 128) as Market1501 dataset did
31 |             3. concatenate to a numpy array
32 |             3. to torch Tensor
33 |             4. normalize
34 |         """
35 |         def _resize(im, size):
36 |             return cv2.resize(im.astype(np.float32) / 255., size)
37 | 
38 |         im_batch = torch.cat([
39 |             self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops
40 |         ],
41 |                              dim=0).float()
42 |         return im_batch
43 | 
44 |     def __call__(self, im_crops):
45 |         im_batch = self._preprocess(im_crops)
46 |         with torch.no_grad():
47 |             im_batch = im_batch.to(self.device)
48 |             features = self.net(im_batch)
49 |         return features.cpu().numpy()
50 | 


--------------------------------------------------------------------------------
/deep_sort/deep/utils/assign_train_val.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import shutil
 4 | from os.path import join
 5 | import random
 6 | 
 7 | root_dir = "./data/reid"
 8 | train_dir = "./data/train2"
 9 | val_dir = "./data/val2"
10 | 
11 | train_percent = 0.6
12 | val_percent = 0.4
13 | 
14 | 
15 | def mkdir_if_not_exist(dir):
16 |     if not os.path.exists(dir):
17 |         os.makedirs(dir)
18 |     else:
19 |         print("%s exists." % dir)
20 | 
21 | 
22 | class_full_path = glob.glob(join(root_dir, "*"))
23 | 
24 | for i in range(len(class_full_path)):
25 |     class_name = os.path.basename(class_full_path[i])
26 | 
27 |     train_new_dir = join(train_dir, class_name)
28 |     val_new_dir = join(val_dir, class_name)
29 | 
30 |     mkdir_if_not_exist(train_new_dir)
31 |     mkdir_if_not_exist(val_new_dir)
32 | 
33 |     all_class_files = glob.glob(join(class_full_path[i], "*.jpg"))
34 | 
35 |     train_class_files = random.sample(
36 |         all_class_files, int(len(all_class_files) * train_percent))
37 | 
38 |     for file_path in all_class_files:
39 |         print("processing %s." % (file_path))
40 |         if file_path in train_class_files:
41 |             # assign to train folder
42 |             shutil.copy(file_path, join(train_new_dir, os.path.basename(file_path)))
43 |         else:
44 |             # assign to val folder
45 |             shutil.copy(file_path, join(val_new_dir, os.path.basename(file_path)))
46 | 


--------------------------------------------------------------------------------
/deep_sort/deep/utils/center_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | nn.AdaptiveAvgPool2d
 4 | class CenterLoss(nn.Module):
 5 |     """Center loss.
 6 |     
 7 |     Reference:
 8 |     Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
 9 |     
10 |     Args:
11 |         num_classes (int): number of classes.
12 |         feat_dim (int): feature dimension.
13 |     """
14 |     def __init__(self, num_classes=10, feat_dim=2, use_gpu=True):
15 |         super(CenterLoss, self).__init__()
16 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
17 |         self.num_classes = num_classes
18 |         self.feat_dim = feat_dim
19 |         self.use_gpu = use_gpu
20 | 
21 |         if self.use_gpu:
22 |             self.centers = nn.Parameter(
23 |                 torch.randn(self.num_classes, self.feat_dim).to(self.device))
24 |         else:
25 |             self.centers = nn.Parameter(
26 |                 torch.randn(self.num_classes, self.feat_dim))
27 | 
28 |     def forward(self, x, labels):
29 |         """
30 |         Args:
31 |             x: feature matrix with shape (batch_size, feat_dim).
32 |             labels: ground truth labels with shape (batch_size).
33 |         """
34 |         batch_size = x.size(0)
35 |         distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
36 |             torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(
37 |                 self.num_classes, batch_size).t()
38 |         distmat.addmm_(1, -2, x, self.centers.t())
39 | 
40 |         classes = torch.arange(self.num_classes).long()
41 |         if self.use_gpu:
42 |             classes = classes.to(self.device)
43 |         labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
44 |         mask = labels.eq(classes.expand(batch_size, self.num_classes))
45 | 
46 |         dist = distmat * mask.float()
47 |         loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size
48 | 
49 |         return loss


--------------------------------------------------------------------------------
/deep_sort/deep/utils/compute_mean_std.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compute channel-wise mean and standard deviation of a dataset.
 3 | 
 4 | Usage:
 5 | $ python compute_mean_std.py DATASET_ROOT DATASET_KEY
 6 | 
 7 | - The first argument points to the root path where you put the datasets.
 8 | - The second argument means the specific dataset key.
 9 | 
10 | For instance, your datasets are put under $DATA and you wanna
11 | compute the statistics of Market1501, do
12 | $ python compute_mean_std.py $DATA market1501
13 | """
14 | import argparse
15 | import torch
16 | import torchvision
17 | 
18 | 
19 | def main():
20 |     parser = argparse.ArgumentParser()
21 |     parser.add_argument('--root', type=str)
22 |     parser.add_argument('--sources', type=str)
23 |     args = parser.parse_args()
24 | 
25 |     train_loader = torch.utils.data.DataLoader(
26 |         torchvision.datasets.ImageFolder("data/train",
27 |         transform=torchvision.transforms.ToTensor()),
28 |         batch_size=6)
29 | 
30 |     print('Computing mean and std ...')
31 |     mean = 0.
32 |     std = 0.
33 |     n_samples = 0.
34 |     for data, label in train_loader:
35 |         batch_size = data.size(0)
36 |         data = data.view(batch_size, data.size(1), -1)
37 |         mean += data.mean(2).sum(0)
38 |         std += data.std(2).sum(0)
39 |         n_samples += batch_size
40 | 
41 |     mean /= n_samples
42 |     std /= n_samples
43 |     print('Mean: {}'.format(mean))
44 |     print('Std: {}'.format(std))
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     main()
49 | 


--------------------------------------------------------------------------------
/deep_sort/deep/utils/rename_all.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import os.path as osp
 4 | 
 5 | root_dir = r'C:\Users\pprp\Desktop\face\head_test'
 6 | 
 7 | for i in os.listdir(root_dir):
 8 |     new_dir = osp.join(root_dir, i)
 9 |     for j in os.listdir(new_dir):
10 |         jpg = osp.join(new_dir, j)
11 |         name, frame, head = j.split("_")
12 |         frame_no = int(frame)
13 |         extend_no = '%04d' % frame_no
14 |         newName = 'head_' + name + "_" + '%s.jpg' % (str(extend_no))
15 |         print('from %s to %s' % (j, newName))
16 |         os.rename(jpg, os.path.join(new_dir, newName))


--------------------------------------------------------------------------------
/deep_sort/deep/utils/tsne_vis.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import argparse
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | from matplotlib import offsetbox
  7 | from sklearn import (manifold, datasets, decomposition, ensemble,
  8 |                      discriminant_analysis, random_projection, neighbors)
  9 | 
 10 | 
 11 | class tSNE_Visual():
 12 |     def __init__(self):
 13 |         super(tSNE_Visual, self).__init__()
 14 |         self.parser = argparse.ArgumentParser()
 15 |         self.parser.add_argument('--Input',
 16 |                                  type=str,
 17 |                                  default='data/reid',
 18 |                                  help='the path of target dataset')
 19 |         self.parser.add_argument('--Size',
 20 |                                  type=int,
 21 |                                  default=100,
 22 |                                  help='the size of every class')
 23 |         self.parser.add_argument('--Zoom',
 24 |                                  type=float,
 25 |                                  default=0.1,
 26 |                                  help='the size of every class')
 27 |         self.parser.add_argument('--Output',
 28 |                                  type=str,
 29 |                                  default='t-SNE1.png',
 30 |                                  help='the out path of result image')
 31 | 
 32 |     def parse(self):
 33 |         self.opt = self.parser.parse_args()
 34 |         args = vars(self.opt)
 35 |         print('\n--- load options ---')
 36 |         for name, value in sorted(args.items()):
 37 |             print('%s: %s' % (str(name), str(value)))
 38 |         return self.opt
 39 | 
 40 |     def plot_embedding(self, X, _output, zoom, title=None):
 41 |         x_min, x_max = np.min(X, 0), np.max(X, 0)
 42 |         X = (X - x_min) / (x_max - x_min)
 43 | 
 44 |         plt.figure(figsize=(20, 20))
 45 |         ax = plt.subplot(111)
 46 | 
 47 |         if hasattr(offsetbox, 'AnnotationBbox'):
 48 |             # only print thumbnails with matplotlib > 1.0
 49 |             shown_images = np.array([[1., 1.]])  # just something big
 50 |             for i in range(X.shape[0]):
 51 |                 dist = np.sum((X[i] - shown_images)**2, 1)
 52 |                 #if np.min(dist) < 4e-3:
 53 |                 # don't show points that are too close
 54 |                 #   continue
 55 |                 shown_images = np.r_[shown_images, [X[i]]]
 56 |                 imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(
 57 |                     real_imgs[i], zoom=0.12, cmap=plt.cm.gray_r),
 58 |                                                     X[i],
 59 |                                                     pad=0)
 60 |                 ax.add_artist(imagebox)
 61 |         '''for i in range(X.shape[0]):
 62 |             #cls = plt.text(X[i, 0], X[i, 1], _classes[y[i][0].astype(int)-1],
 63 |             cls = plt.text(X[i, 0], X[i, 1], str(y[i].astype(int)),
 64 |             #cls = plt.text(X[i, 0], X[i, 1], '★',
 65 |                      color=_colors[int(y[i][0]-1)],
 66 |                      fontdict={'weight': 'bold', 'size': 12})
 67 |             cls.set_zorder(20) '''
 68 | 
 69 |         ax.spines['top'].set_visible(False)
 70 |         ax.spines['right'].set_visible(False)
 71 |         ax.spines['bottom'].set_visible(False)
 72 |         ax.spines['left'].set_visible(False)
 73 |         plt.xticks([]), plt.yticks([])
 74 |         if title is not None:
 75 |             plt.title(title)
 76 |         plt.savefig(_output)
 77 | 
 78 | 
 79 | if __name__ == '__main__':
 80 |     # Disable the GUI matplotlib
 81 |     plt.switch_backend('agg')
 82 | 
 83 |     tsne_visual = tSNE_Visual()
 84 |     opts = tsne_visual.parse()
 85 |     dataroot = opts.Input
 86 |     _size = opts.Size
 87 |     _output = opts.Output
 88 |     _zoom = opts.Zoom
 89 | 
 90 |     dirs = []
 91 |     for item in os.listdir(dataroot):
 92 |         if ('.ipynb_checkpoints' not in item):
 93 |             dirs.append(item)
 94 | 
 95 |     _len = len(dirs)
 96 |     y = np.zeros((_size * _len, 1))
 97 |     for i in range(_len):
 98 |         y[i * _size:(i + 1) * _size] = i + 1
 99 | 
100 |     imgs = []
101 |     real_imgs = []
102 |     for i in range(_len):
103 |         single_cls = []
104 |         path = os.path.join(dataroot, dirs[i])
105 |         dataset_list = os.listdir(path)
106 |         cnt = 0
107 |         for item in dataset_list:
108 |             if (cnt == _size):
109 |                 break
110 |             if ('.ipynb_checkpoints' in item):
111 |                 continue
112 |             data_path = os.path.join(path, item)
113 |             temp = cv2.imread(data_path)
114 |             real_img = cv2.cvtColor(temp, cv2.COLOR_BGR2RGB)
115 |             imgs.append(temp.reshape(-1))
116 |             real_imgs.append(real_img)
117 |             cnt = cnt + 1
118 |     np_imgs = np.array(imgs)
119 |     real_imgs = np.array(real_imgs)
120 | 
121 |     tsne = manifold.TSNE(n_components=2, init='random', random_state=0)
122 |     print(np_imgs.shape)
123 |     result = tsne.fit_transform(np_imgs)
124 | 
125 |     tsne_visual.plot_embedding(X=result, _output=_output, zoom=_zoom)


--------------------------------------------------------------------------------
/deep_sort/deep_sort.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from .deep.feature_extractor import Extractor
  4 | from .sort.nn_matching import NearestNeighborDistanceMetric
  5 | from .sort.preprocessing import non_max_suppression
  6 | from .sort.detection import Detection
  7 | from .sort.tracker import Tracker
  8 | 
  9 | __all__ = ['DeepSort']
 10 | 
 11 | 
 12 | class DeepSort(object):
 13 |     def __init__(self, model_path, max_dist=0.2):
 14 |         self.min_confidence = 0.3
 15 |         # yolov3中检测结果置信度阈值，筛选置信度小于0.3的detection。
 16 | 
 17 |         self.nms_max_overlap = 1.0
 18 |         # 非极大抑制阈值，设置为1代表不进行抑制
 19 | 
 20 |         # 用于提取图片的embedding,返回的是一个batch图片对应的特征
 21 |         self.extractor = Extractor("mobilenetv2_x1_0",
 22 |                                    model_path,
 23 |                                    use_cuda=True)
 24 | 
 25 |         max_cosine_distance = max_dist
 26 |         # 用在级联匹配的地方，如果大于改阈值，就直接忽略
 27 |         nn_budget = 100
 28 |         # 预算，每个类别最多的样本个数，如果超过，删除旧的
 29 | 
 30 |         # 第一个参数可选'cosine' or 'euclidean'
 31 |         metric = NearestNeighborDistanceMetric("cosine",
 32 |                                                max_cosine_distance,
 33 |                                                nn_budget)
 34 |         self.tracker = Tracker(metric)
 35 | 
 36 |     def update(self, bbox_xywh, confidences, ori_img):
 37 |         self.height, self.width = ori_img.shape[:2]
 38 |         # generate detections
 39 |         features = self._get_features(bbox_xywh, ori_img)
 40 |         # 从原图中crop bbox对应图片并计算得到embedding
 41 |         bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
 42 | 
 43 |         detections = [
 44 |             Detection(bbox_tlwh[i], conf, features[i])
 45 |             for i, conf in enumerate(confidences) if conf > self.min_confidence
 46 |         ]  # 筛选小于min_confidence的目标，并构造一个Detection对象构成的列表
 47 |         # Detection是一个存储图中一个bbox结果
 48 |         # 需要：1. bbox(tlwh形式) 2. 对应置信度 3. 对应embedding
 49 | 
 50 |         # run on non-maximum supression
 51 |         boxes = np.array([d.tlwh for d in detections])
 52 |         scores = np.array([d.confidence for d in detections])
 53 | 
 54 |         # 使用非极大抑制
 55 |         # 默认nms_thres=1的时候开启也没有用，实际上并没有进行非极大抑制
 56 |         indices = non_max_suppression(boxes, self.nms_max_overlap, scores)
 57 |         detections = [detections[i] for i in indices]
 58 | 
 59 |         # update tracker
 60 |         # tracker给出一个预测结果，然后将detection传入，进行卡尔曼滤波操作
 61 |         self.tracker.predict()
 62 |         self.tracker.update(detections)
 63 | 
 64 |         # output bbox identities
 65 |         # 存储结果以及可视化
 66 |         outputs = []
 67 |         for track in self.tracker.tracks:
 68 |             if not track.is_confirmed() or track.time_since_update > 1:
 69 |                 continue
 70 |             box = track.to_tlwh()
 71 |             x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
 72 |             track_id = track.track_id
 73 |             outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
 74 | 
 75 |         if len(outputs) > 0:
 76 |             outputs = np.stack(outputs, axis=0)
 77 |         return np.array(outputs)
 78 | 
 79 |     """
 80 |     TODO:
 81 |         Convert bbox from xc_yc_w_h to xtl_ytl_w_h
 82 |     Thanks JieChen91@github.com for reporting this bug!
 83 |     """
 84 | 
 85 |     @staticmethod
 86 |     def _xywh_to_tlwh(bbox_xywh):
 87 |         bbox_xywh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
 88 |         bbox_xywh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
 89 |         return bbox_xywh
 90 | 
 91 |     def _xywh_to_xyxy(self, bbox_xywh):
 92 |         x, y, w, h = bbox_xywh
 93 |         x1 = max(int(x - w / 2), 0)
 94 |         x2 = min(int(x + w / 2), self.width - 1)
 95 |         y1 = max(int(y - h / 2), 0)
 96 |         y2 = min(int(y + h / 2), self.height - 1)
 97 |         return x1, y1, x2, y2
 98 | 
 99 |     def _tlwh_to_xyxy(self, bbox_tlwh):
100 |         """
101 |         TODO:
102 |             Convert bbox from xtl_ytl_w_h to xc_yc_w_h
103 |         Thanks JieChen91@github.com for reporting this bug!
104 |         """
105 |         x, y, w, h = bbox_tlwh
106 |         x1 = max(int(x), 0)
107 |         x2 = min(int(x + w), self.width - 1)
108 |         y1 = max(int(y), 0)
109 |         y2 = min(int(y + h), self.height - 1)
110 |         return x1, y1, x2, y2
111 | 
112 |     def _get_features(self, bbox_xywh, ori_img):
113 |         im_crops = []
114 |         for box in bbox_xywh:
115 |             x1, y1, x2, y2 = self._xywh_to_xyxy(box)
116 |             im = ori_img[y1:y2, x1:x2]
117 |             im_crops.append(im)
118 |         if im_crops:
119 |             # 在这里调用并提取embedding
120 |             features = self.extractor(im_crops)
121 |         else:
122 |             features = np.array([])
123 |         return features
124 | 


--------------------------------------------------------------------------------
/deep_sort/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/deep_sort/sort/__init__.py


--------------------------------------------------------------------------------
/deep_sort/sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     feature : ndarray | NoneType
25 |         A feature vector that describes the object contained in this image.
26 | 
27 |     """
28 | 
29 |     def __init__(self, tlwh, confidence, feature):
30 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
31 |         self.confidence = float(confidence)
32 |         self.feature = np.asarray(feature, dtype=np.float32)
33 | 
34 |     def to_tlbr(self):
35 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
36 |         `(top left, bottom right)`.
37 |         """
38 |         ret = self.tlwh.copy()
39 |         ret[2:] += ret[:2]
40 |         return ret
41 | 
42 |     def to_xyah(self):
43 |         """Convert bounding box to format `(center x, center y, aspect ratio,
44 |         height)`, where the aspect ratio is `width / height`.
45 |         """
46 |         ret = self.tlwh.copy()
47 |         ret[:2] += ret[2:] / 2
48 |         ret[2] /= ret[3]
49 |         return ret
50 | 


--------------------------------------------------------------------------------
/deep_sort/sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | from . import linear_assignment
 4 | '''
 5 | 功能列表
 6 | '''
 7 | 
 8 | 
 9 | def iou(bbox, candidates):
10 |     # 计算iou
11 |     """Computer intersection over union.
12 | 
13 |     Parameters
14 |     ----------
15 |     bbox : ndarray
16 |         A bounding box in format `(top left x, top left y, width, height)`.
17 |     candidates : ndarray
18 |         A matrix of candidate bounding boxes (one per row) in the same format
19 |         as `bbox`.
20 | 
21 |     Returns
22 |     -------
23 |     ndarray
24 |         The intersection over union in [0, 1] between the `bbox` and each
25 |         candidate. A higher score means a larger fraction of the `bbox` is
26 |         occluded by the candidate.
27 | 
28 |     """
29 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
30 |     candidates_tl = candidates[:, :2]
31 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
32 | 
33 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
34 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
35 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
36 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
37 |     wh = np.maximum(0., br - tl)
38 | 
39 |     area_intersection = wh.prod(axis=1)
40 |     area_bbox = bbox[2:].prod()
41 |     area_candidates = candidates[:, 2:].prod(axis=1)
42 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
43 | 
44 | 
45 | def iou_cost(tracks, detections, track_indices=None,
46 |              detection_indices=None):
47 |     # 计算track和detection之间的iou距离矩阵
48 |     """An intersection over union distance metric.
49 | 
50 |     Parameters
51 |     ----------
52 |     tracks : List[deep_sort.track.Track]
53 |         A list of tracks.
54 |     detections : List[deep_sort.detection.Detection]
55 |         A list of detections.
56 |     track_indices : Optional[List[int]]
57 |         A list of indices to tracks that should be matched. Defaults to
58 |         all `tracks`.
59 |     detection_indices : Optional[List[int]]
60 |         A list of indices to detections that should be matched. Defaults
61 |         to all `detections`.
62 | 
63 |     Returns
64 |     -------
65 |     ndarray
66 |         Returns a cost matrix of shape
67 |         len(track_indices), len(detection_indices) where entry (i, j) is
68 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
69 | 
70 |     """
71 |     if track_indices is None:
72 |         track_indices = np.arange(len(tracks))
73 |     if detection_indices is None:
74 |         detection_indices = np.arange(len(detections))
75 | 
76 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
77 |     for row, track_idx in enumerate(track_indices):
78 |         if tracks[track_idx].time_since_update > 1:
79 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
80 |             continue
81 | 
82 |         bbox = tracks[track_idx].to_tlwh()
83 |         candidates = np.asarray(
84 |             [detections[i].tlwh for i in detection_indices])
85 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
86 |     return cost_matrix
87 | 


--------------------------------------------------------------------------------
/deep_sort/sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | # 计算欧氏距离
  6 | def _pdist(a, b):
  7 |     # 用于计算成对的平方距离
  8 |     # a NxM 代表N个对象，每个对象有M个数值作为embedding进行比较
  9 |     # b LxM 代表L个对象，每个对象有M个数值作为embedding进行比较
 10 |     # 返回的是NxL的矩阵，比如dist[i][j]代表a[i]和b[j]之间的平方和距离
 11 |     # 实现见：https://blog.csdn.net/frankzd/article/details/80251042
 12 |     """Compute pair-wise squared distance between points in `a` and `b`.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     a : array_like
 17 |         An NxM matrix of N samples of dimensionality M.
 18 |     b : array_like
 19 |         An LxM matrix of L samples of dimensionality M.
 20 | 
 21 |     Returns
 22 |     -------
 23 |     ndarray
 24 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 25 |         contains the squared distance between `a[i]` and `b[j]`.
 26 | 
 27 |     """
 28 |     a, b = np.asarray(a), np.asarray(b)  # 拷贝一份数据
 29 |     if len(a) == 0 or len(b) == 0:
 30 |         return np.zeros((len(a), len(b)))
 31 |     a2, b2 = np.square(a).sum(axis=1), np.square(
 32 |         b).sum(axis=1)  # 求每个embedding的平方和
 33 |     # sum(N) + sum(L) -2 x [NxM]x[MxL] = [NxL]
 34 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 35 |     r2 = np.clip(r2, 0., float(np.inf))
 36 |     return r2
 37 | 
 38 | 
 39 | def _cosine_distance(a, b, data_is_normalized=False):
 40 |     # a和b之间的余弦距离
 41 |     # a : [NxM] b : [LxM]
 42 |     # 余弦距离 = 1 - 余弦相似度
 43 |     # https://blog.csdn.net/u013749540/article/details/51813922
 44 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 45 | 
 46 |     Parameters
 47 |     ----------
 48 |     a : array_like
 49 |         An NxM matrix of N samples of dimensionality M.
 50 |     b : array_like
 51 |         An LxM matrix of L samples of dimensionality M.
 52 |     data_is_normalized : Optional[bool]
 53 |         If True, assumes rows in a and b are unit length vectors.
 54 |         Otherwise, a and b are explicitly normalized to lenght 1.
 55 | 
 56 |     Returns
 57 |     -------
 58 |     ndarray
 59 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 60 |         contains the squared distance between `a[i]` and `b[j]`.
 61 | 
 62 |     """
 63 |     if not data_is_normalized:
 64 |         # 需要将余弦相似度转化成类似欧氏距离的余弦距离。
 65 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 66 |         #  np.linalg.norm 操作是求向量的范式，默认是L2范式，等同于求向量的欧式距离。
 67 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 68 |     return 1. - np.dot(a, b.T)
 69 | 
 70 | 
 71 | def _nn_euclidean_distance(x, y):
 72 |     # 最近邻欧氏距离
 73 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 74 | 
 75 |     Parameters
 76 |     ----------
 77 |     x : ndarray
 78 |         A matrix of N row-vectors (sample points).
 79 |     y : ndarray
 80 |         A matrix of M row-vectors (query points).
 81 | 
 82 |     Returns
 83 |     -------
 84 |     ndarray
 85 |         A vector of length M that contains for each entry in `y` the
 86 |         smallest Euclidean distance to a sample in `x`.
 87 | 
 88 |     """
 89 |     distances = _pdist(x, y)
 90 |     return np.maximum(0.0, distances.min(axis=0))  # 找到最小值
 91 | 
 92 | 
 93 | def _nn_cosine_distance(x, y):
 94 |     # 最近邻余弦距离
 95 |     """ Helper function for nearest neighbor distance metric (cosine).
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     x : ndarray
100 |         A matrix of N row-vectors (sample points).
101 |     y : ndarray
102 |         A matrix of M row-vectors (query points).
103 | 
104 |     Returns
105 |     -------
106 |     ndarray
107 |         A vector of length M that contains for each entry in `y` the
108 |         smallest cosine distance to a sample in `x`.
109 | 
110 |     """
111 |     distances = _cosine_distance(x, y)
112 |     return distances.min(axis=0)
113 | 
114 | 
115 | class NearestNeighborDistanceMetric(object):
116 |     # 对于每个目标，返回一个最近的距离
117 |     """
118 |     A nearest neighbor distance metric that, for each target, returns
119 |     the closest distance to any sample that has been observed so far.
120 | 
121 |     Parameters
122 |     ----------
123 |     metric : str
124 |         Either "euclidean" or "cosine".
125 |     matching_threshold: float
126 |         The matching threshold. Samples with larger distance are considered an
127 |         invalid match.
128 |     budget : Optional[int]
129 |         If not None, fix samples per class to at most this number. Removes
130 |         the oldest samples when the budget is reached.
131 | 
132 |     Attributes
133 |     ----------
134 |     samples : Dict[int -> List[ndarray]]
135 |         A dictionary that maps from target identities to the list of samples
136 |         that have been observed so far.
137 | 
138 |     """
139 | 
140 |     def __init__(self, metric, matching_threshold, budget=None):
141 |         # 默认matching_threshold = 0.2 budge = 100
142 |         if metric == "euclidean":
143 |             # 使用最近邻欧氏距离
144 |             self._metric = _nn_euclidean_distance
145 |         elif metric == "cosine":
146 |             # 使用最近邻余弦距离
147 |             self._metric = _nn_cosine_distance
148 |         else:
149 |             raise ValueError(
150 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
151 | 
152 |         self.matching_threshold = matching_threshold
153 |         # matching_threshold是在级联匹配的函数中调用
154 |         self.budget = budget
155 |         # budge 预算，控制feature的多少
156 | 
157 |         self.samples = {}
158 |         # samples是一个字典{id->feature list}
159 | 
160 |     def partial_fit(self, features, targets, active_targets):
161 |         # 作用：部分拟合，用新的数据更新测量距离
162 |         # 调用：在特征集更新模块部分调用，tracker.update()中
163 |         """Update the distance metric with new data.
164 | 
165 |         Parameters
166 |         ----------
167 |         features : ndarray
168 |             An NxM matrix of N features of dimensionality M.
169 |         targets : ndarray
170 |             An integer array of associated target identities.
171 |         active_targets : List[int]
172 |             A list of targets that are currently present in the scene.
173 |         """
174 |         for feature, target in zip(features, targets):
175 |             self.samples.setdefault(target, []).append(feature)
176 |             # 对应目标下添加新的feature，更新feature集合
177 |             # 目标id  :  feature list
178 |             if self.budget is not None:
179 |                 self.samples[target] = self.samples[target][-self.budget:]
180 |             # 设置预算，每个类最多多少个目标，超过直接忽略
181 | 
182 |         # 筛选激活的目标
183 |         self.samples = {k: self.samples[k] for k in active_targets}
184 | 
185 |     def distance(self, features, targets):
186 |         # 作用：比较feature和targets之间的距离，返回一个代价矩阵
187 |         # 调用：在匹配阶段，将distance封装为gated_metric,
188 |         #       进行外观信息(reid得到的深度特征)+
189 |         #       运动信息(马氏距离用于度量两个分布相似程度)
190 |         """Compute distance between features and targets.
191 | 
192 |         Parameters
193 |         ----------
194 |         features : ndarray
195 |             An NxM matrix of N features of dimensionality M.
196 |         targets : List[int]
197 |             A list of targets to match the given `features` against.
198 | 
199 |         Returns
200 |         -------
201 |         ndarray
202 |             Returns a cost matrix of shape len(targets), len(features), where
203 |             element (i, j) contains the closest squared distance between
204 |             `targets[i]` and `features[j]`.
205 | 
206 |         """
207 |         cost_matrix = np.zeros((len(targets), len(features)))
208 |         for i, target in enumerate(targets):
209 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
210 |         return cost_matrix
211 | 


--------------------------------------------------------------------------------
/deep_sort/sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/deep_sort/sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     # 一个轨迹的信息，包含(x,y,a,h) & v
 21 |     """
 22 |     A single target track with state space `(x, y, a, h)` and associated
 23 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 24 |     aspect ratio and `h` is the height.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     mean : ndarray
 29 |         Mean vector of the initial state distribution.
 30 |     covariance : ndarray
 31 |         Covariance matrix of the initial state distribution.
 32 |     track_id : int
 33 |         A unique track identifier.
 34 |     n_init : int
 35 |         Number of consecutive detections before the track is confirmed. The
 36 |         track state is set to `Deleted` if a miss occurs within the first
 37 |         `n_init` frames.
 38 |     max_age : int
 39 |         The maximum number of consecutive misses before the track state is
 40 |         set to `Deleted`.
 41 |     feature : Optional[ndarray]
 42 |         Feature vector of the detection this track originates from. If not None,
 43 |         this feature is added to the `features` cache.
 44 | 
 45 |     Attributes
 46 |     ----------
 47 |     mean : ndarray
 48 |         Mean vector of the initial state distribution.
 49 |     covariance : ndarray
 50 |         Covariance matrix of the initial state distribution.
 51 |     track_id : int
 52 |         A unique track identifier.
 53 |     hits : int
 54 |         Total number of measurement updates.
 55 |     age : int
 56 |         Total number of frames since first occurance.
 57 |     time_since_update : int
 58 |         Total number of frames since last measurement update.
 59 |     state : TrackState
 60 |         The current track state.
 61 |     features : List[ndarray]
 62 |         A cache of features. On each measurement update, the associated feature
 63 |         vector is added to this list.
 64 | 
 65 |     """
 66 | 
 67 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
 68 |                  feature=None):
 69 |         # max age是一个存活期限，默认为70帧
 70 |         self.mean = mean
 71 |         self.covariance = covariance
 72 |         self.track_id = track_id
 73 |         self.hits = 1 
 74 |         # hits和n_init进行比较
 75 |         # hits每次update的时候进行一次更新（只有match的时候才进行update）
 76 |         # hits代表匹配上了多少次，匹配次数超过n_init就会设置为confirmed状态
 77 |         self.age = 1 # 没有用到,和time_since_update功能重复
 78 |         self.time_since_update = 0
 79 |         # 每次调用predict函数的时候就会+1
 80 |         # 每次调用update函数的时候就会设置为0
 81 | 
 82 |         self.state = TrackState.Tentative
 83 |         self.features = []
 84 |         # 每个track对应多个features, 每次更新都将最新的feature添加到列表中
 85 |         if feature is not None:
 86 |             self.features.append(feature)
 87 | 
 88 |         self._n_init = n_init  # 如果连续n_init帧都没有出现匹配，设置为deleted状态
 89 |         self._max_age = max_age  # 上限
 90 | 
 91 |     def to_tlwh(self):
 92 |         """Get current position in bounding box format `(top left x, top left y,
 93 |         width, height)`.
 94 | 
 95 |         Returns
 96 |         -------
 97 |         ndarray
 98 |             The bounding box.
 99 | 
100 |         """
101 |         ret = self.mean[:4].copy()
102 |         ret[2] *= ret[3]
103 |         ret[:2] -= ret[2:] / 2
104 |         return ret
105 | 
106 |     def to_tlbr(self):
107 |         """Get current position in bounding box format `(min x, miny, max x,
108 |         max y)`.
109 | 
110 |         Returns
111 |         -------
112 |         ndarray
113 |             The bounding box.
114 | 
115 |         """
116 |         ret = self.to_tlwh()
117 |         ret[2:] = ret[:2] + ret[2:]
118 |         return ret
119 | 
120 |     def predict(self, kf):
121 |         # 预测结果
122 |         """Propagate the state distribution to the current time step using a
123 |         Kalman filter prediction step.
124 | 
125 |         Parameters
126 |         ----------
127 |         kf : kalman_filter.KalmanFilter
128 |             The Kalman filter.
129 | 
130 |         """
131 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
132 |         self.age += 1
133 |         self.time_since_update += 1
134 | 
135 |     def update(self, kf, detection):
136 |         # 将预测结果和观测结果结合
137 |         """Perform Kalman filter measurement update step and update the feature
138 |         cache.
139 | 
140 |         Parameters
141 |         ----------
142 |         kf : kalman_filter.KalmanFilter
143 |             The Kalman filter.
144 |         detection : Detection
145 |             The associated detection.
146 |         """
147 |         self.mean, self.covariance = kf.update(
148 |             self.mean, self.covariance, detection.to_xyah())
149 |         self.features.append(detection.feature)
150 | 
151 |         self.hits += 1
152 |         self.time_since_update = 0
153 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
154 |             self.state = TrackState.Confirmed
155 | 
156 |     def mark_missed(self):
157 |         # 非常重要
158 |         """Mark this track as missed (no association at the current time step).
159 |         """
160 |         if self.state == TrackState.Tentative:
161 |             self.state = TrackState.Deleted
162 |         elif self.time_since_update > self._max_age:
163 |             self.state = TrackState.Deleted
164 | 
165 |     def is_tentative(self):
166 |         """Returns True if this track is tentative (unconfirmed).
167 |         """
168 |         return self.state == TrackState.Tentative
169 | 
170 |     def is_confirmed(self):
171 |         """Returns True if this track is confirmed."""
172 |         return self.state == TrackState.Confirmed
173 | 
174 |     def is_deleted(self):
175 |         """Returns True if this track is dead and should be deleted."""
176 |         return self.state == TrackState.Deleted
177 | 


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from sys import platform
  3 | 
  4 | from models import *  # set ONNX_EXPORT in models.py
  5 | from utils.datasets import *
  6 | from utils.utils import *
  7 | 
  8 | 
  9 | def detect(save_txt=False, save_img=False):
 10 |     img_size = (320, 192) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
 11 |     out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img
 12 |     webcam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
 13 | 
 14 |     # Initialize
 15 |     device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
 16 |     if os.path.exists(out):
 17 |         shutil.rmtree(out)  # delete output folder
 18 |     os.makedirs(out)  # make new output folder
 19 | 
 20 |     # Initialize model
 21 |     model = Darknet(opt.cfg, img_size)
 22 | 
 23 |     # Load weights
 24 |     attempt_download(weights)
 25 |     if weights.endswith('.pt'):  # pytorch format
 26 |         model.load_state_dict(torch.load(weights, map_location=device)['model'])
 27 |     else:  # darknet format
 28 |         _ = load_darknet_weights(model, weights)
 29 | 
 30 |     # Second-stage classifier
 31 |     classify = False
 32 |     if classify:
 33 |         modelc = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
 34 |         modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
 35 |         modelc.to(device).eval()
 36 | 
 37 |     # Fuse Conv2d + BatchNorm2d layers
 38 |     # model.fuse()
 39 | 
 40 |     # Eval mode
 41 |     model.to(device).eval()
 42 | 
 43 |     # Export mode
 44 |     if ONNX_EXPORT:
 45 |         img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
 46 |         torch.onnx.export(model, img, 'weights/export.onnx', verbose=False, opset_version=10)
 47 | 
 48 |         # Validate exported model
 49 |         import onnx
 50 |         model = onnx.load('weights/export.onnx')  # Load the ONNX model
 51 |         onnx.checker.check_model(model)  # Check that the IR is well formed
 52 |         print(onnx.helper.printable_graph(model.graph))  # Print a human readable representation of the graph
 53 |         return
 54 | 
 55 |     # Half precision
 56 |     half = half and device.type != 'cpu'  # half precision only supported on CUDA
 57 |     if half:
 58 |         model.half()
 59 | 
 60 |     # Set Dataloader
 61 |     vid_path, vid_writer = None, None
 62 |     if webcam:
 63 |         view_img = True
 64 |         torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
 65 |         dataset = LoadStreams(source, img_size=img_size, half=half)
 66 |     else:
 67 |         save_img = True
 68 |         dataset = LoadImages(source, img_size=img_size, half=half)
 69 | 
 70 |     # Get names and colors
 71 |     names = load_classes(opt.names)
 72 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
 73 | 
 74 |     # Run inference
 75 |     t0 = time.time()
 76 |     for path, img, im0s, vid_cap in dataset:
 77 |         t = time.time()
 78 | 
 79 |         # Get detections
 80 |         img = torch.from_numpy(img).to(device)
 81 |         if img.ndimension() == 3:
 82 |             img = img.unsqueeze(0)
 83 |         pred = model(img)[0]
 84 | 
 85 |         if opt.half:
 86 |             pred = pred.float()
 87 | 
 88 |         # Apply NMS
 89 |         pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres)
 90 | 
 91 |         # Apply
 92 |         if classify:
 93 |             pred = apply_classifier(pred, modelc, img, im0s)
 94 | 
 95 |         # Process detections
 96 |         for i, det in enumerate(pred):  # detections per image
 97 |             if webcam:  # batch_size >= 1
 98 |                 p, s, im0 = path[i], '%g: ' % i, im0s[i]
 99 |             else:
100 |                 p, s, im0 = path, '', im0s
101 | 
102 |             save_path = str(Path(out) / Path(p).name)
103 |             s += '%gx%g ' % img.shape[2:]  # print string
104 |             if det is not None and len(det):
105 |                 # Rescale boxes from img_size to im0 size
106 |                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
107 | 
108 |                 # Print results
109 |                 for c in det[:, -1].unique():
110 |                     n = (det[:, -1] == c).sum()  # detections per class
111 |                     s += '%g %ss, ' % (n, names[int(c)])  # add to string
112 | 
113 |                 # Write results
114 |                 for *xyxy, conf, _, cls in det:
115 |                     if save_txt:  # Write to file
116 |                         with open(save_path + '.txt', 'a') as file:
117 |                             file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
118 | 
119 |                     if save_img or view_img:  # Add bbox to image
120 |                         label = '%s %.2f' % (names[int(cls)], conf)
121 |                         plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
122 | 
123 |             print('%sDone. (%.3fs)' % (s, time.time() - t))
124 | 
125 |             # Stream results
126 |             if view_img:
127 |                 cv2.imshow(p, im0)
128 |                 if cv2.waitKey(1) == ord('q'):  # q to quit
129 |                     raise StopIteration
130 | 
131 |             # Save results (image with detections)
132 |             if save_img:
133 |                 if dataset.mode == 'images':
134 |                     cv2.imwrite(save_path, im0)
135 |                 else:
136 |                     if vid_path != save_path:  # new video
137 |                         vid_path = save_path
138 |                         if isinstance(vid_writer, cv2.VideoWriter):
139 |                             vid_writer.release()  # release previous video writer
140 | 
141 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
142 |                         w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
143 |                         h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
144 |                         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
145 |                     vid_writer.write(im0)
146 | 
147 |     if save_txt or save_img:
148 |         print('Results saved to %s' % os.getcwd() + os.sep + out)
149 |         if platform == 'darwin':  # MacOS
150 |             os.system('open ' + out + ' ' + save_path)
151 | 
152 |     print('Done. (%.3fs)' % (time.time() - t0))
153 | 
154 | 
155 | if __name__ == '__main__':
156 |     parser = argparse.ArgumentParser()
157 |     parser.add_argument('--cfg', type=str, default='cfg/yolov3-1cls.cfg', help='*.cfg path')
158 |     parser.add_argument('--names', type=str, default='data/voc_small.names', help='*.names path')
159 |     parser.add_argument('--weights', type=str, default='weights/yolov3-spp.weights', help='path to weights file')
160 |     parser.add_argument('--source', type=str, default='data/samples', help='source')  # input file/folder, 0 for webcam
161 |     parser.add_argument('--output', type=str, default='output', help='output folder')  # output folder
162 |     parser.add_argument('--img-size', type=int, default=416, help='inference size (pixels)')
163 |     parser.add_argument('--conf-thres', type=float, default=0.5, help='object confidence threshold')
164 |     parser.add_argument('--nms-thres', type=float, default=0.5, help='iou threshold for non-maximum suppression')
165 |     parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
166 |     parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
167 |     parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')
168 |     parser.add_argument('--view-img', action='store_true', help='display results')
169 |     opt = parser.parse_args()
170 |     print(opt)
171 | 
172 |     with torch.no_grad():
173 |         detect()
174 | 


--------------------------------------------------------------------------------
/eval_mot.py:
--------------------------------------------------------------------------------
  1 | # py-motmetrics - Metrics for multiple object tracker (MOT) benchmarking.
  2 | # https://github.com/cheind/py-motmetrics/
  3 | #
  4 | # MIT License
  5 | # Copyright (c) 2017-2020 Christoph Heindl, Jack Valmadre and others.
  6 | # See LICENSE file for terms.
  7 | """Compute metrics for trackers using MOTChallenge ground-truth data."""
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | import argparse
 14 | from collections import OrderedDict
 15 | import glob
 16 | import logging
 17 | import os
 18 | from pathlib import Path
 19 | 
 20 | import motmetrics as mm
 21 | 
 22 | 
 23 | def parse_args():
 24 |     """Defines and parses command-line arguments."""
 25 |     parser = argparse.ArgumentParser(
 26 |         description="""
 27 | Compute metrics for trackers using MOTChallenge ground-truth data.
 28 | Files
 29 | -----
 30 | All file content, ground truth and test files, have to comply with the
 31 | format described in
 32 | Milan, Anton, et al.
 33 | "Mot16: A benchmark for multi-object tracking."
 34 | arXiv preprint arXiv:1603.00831 (2016).
 35 | https://motchallenge.net/
 36 | Structure
 37 | ---------
 38 | Layout for ground truth data
 39 |     <GT_ROOT>/<SEQUENCE_1>/gt/gt.txt
 40 |     <GT_ROOT>/<SEQUENCE_2>/gt/gt.txt
 41 |     ...
 42 | Layout for test data
 43 |     <TEST_ROOT>/<SEQUENCE_1>.txt
 44 |     <TEST_ROOT>/<SEQUENCE_2>.txt
 45 |     ...
 46 | Sequences of ground truth and test will be matched according to the `<SEQUENCE_X>`
 47 | string.""",
 48 |         formatter_class=argparse.RawTextHelpFormatter)
 49 | 
 50 |     parser.add_argument('--groundtruths',
 51 |                         type=str,
 52 |                         default="./data/videosample",
 53 |                         help='Directory containing ground truth files.')
 54 |     parser.add_argument('--tests',
 55 |                         type=str,
 56 |                         default="./data/videoresult",
 57 |                         help='Directory containing tracker result files')
 58 |     parser.add_argument('--loglevel',
 59 |                         type=str,
 60 |                         help='Log level',
 61 |                         default='info')
 62 |     parser.add_argument('--fmt', type=str, help='Data format', default='mot16')
 63 |     parser.add_argument('--solver',
 64 |                         type=str,
 65 |                         help='LAP solver to use for matching between frames.')
 66 |     parser.add_argument(
 67 |         '--id_solver',
 68 |         type=str,
 69 |         help='LAP solver to use for ID metrics. Defaults to --solver.')
 70 |     parser.add_argument('--exclude_id',
 71 |                         dest='exclude_id',
 72 |                         default=False,
 73 |                         action='store_true',
 74 |                         help='Disable ID metrics')
 75 |     return parser.parse_args()
 76 | 
 77 | 
 78 | def compare_dataframes(gts, ts):
 79 |     """Builds accumulator for each sequence."""
 80 |     accs = []
 81 |     names = []
 82 |     for k, tsacc in ts.items():
 83 |         # print(k)
 84 |         # print(gts)
 85 |         if k in gts:
 86 |             logging.info('Comparing %s...', k)
 87 |             accs.append(
 88 |                 mm.utils.compare_to_groundtruth(gts[k],
 89 |                                                 tsacc,
 90 |                                                 'iou',
 91 |                                                 distth=0.5))
 92 |             names.append(k)
 93 |         else:
 94 |             logging.warning('No ground truth for %s, skipping.', k)
 95 | 
 96 |     return accs, names
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     # pylint: disable=missing-function-docstring
101 |     args = parse_args()
102 | 
103 |     loglevel = getattr(logging, args.loglevel.upper(), None)
104 |     if not isinstance(loglevel, int):
105 |         raise ValueError('Invalid log level: {} '.format(args.loglevel))
106 |     logging.basicConfig(level=loglevel,
107 |                         format='%(asctime)s %(levelname)s - %(message)s',
108 |                         datefmt='%I:%M:%S')
109 | 
110 |     if args.solver:
111 |         mm.lap.default_solver = args.solver
112 | 
113 |     gtfiles = glob.glob(os.path.join(args.groundtruths, 'cutout*/gt.txt'))
114 |     tsfiles = [
115 |         f for f in glob.glob(os.path.join(args.tests, '*.txt'))
116 |         if not os.path.basename(f).startswith('eval')
117 |     ]
118 | 
119 |     # print(gtfiles,'\n\n', tsfiles)
120 | 
121 |     logging.info('Found %d groundtruths and %d test files.', len(gtfiles),
122 |                  len(tsfiles))
123 |     logging.info('Available LAP solvers %s', str(mm.lap.available_solvers))
124 |     logging.info('Default LAP solver \'%s\'', mm.lap.default_solver)
125 |     logging.info('Loading files.')
126 | 
127 |     gt = OrderedDict([(Path(f).parts[-2],
128 |                        mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1))
129 |                       for f in gtfiles])
130 |     ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0],
131 |                        mm.io.loadtxt(f, fmt=args.fmt)) for f in tsfiles])
132 | 
133 |     mh = mm.metrics.create()
134 |     accs, names = compare_dataframes(gt, ts)
135 | 
136 |     metrics = list(mm.metrics.motchallenge_metrics)
137 |     if args.exclude_id:
138 |         metrics = [x for x in metrics if not x.startswith('id')]
139 | 
140 |     logging.info('Running metrics')
141 | 
142 |     if args.id_solver:
143 |         mm.lap.default_solver = args.id_solver
144 |     summary = mh.compute_many(accs,
145 |                               names=names,
146 |                               metrics=metrics,
147 |                               generate_overall=True)
148 |     print(
149 |         mm.io.render_summary(summary,
150 |                              formatters=mh.formatters,
151 |                              namemap=mm.io.motchallenge_metric_names))
152 |     logging.info('Completed')
153 | 


--------------------------------------------------------------------------------
/miniversion/cow.names:
--------------------------------------------------------------------------------
1 | cow
2 | 


--------------------------------------------------------------------------------
/miniversion/predict.py:
--------------------------------------------------------------------------------
  1 | # -*- encoding: utf-8 -*-
  2 | '''
  3 | @File    :   predict.py
  4 | @Time    :   2019/12/29 16:33:04
  5 | @Author  :   pprp
  6 | @Contact :   1115957667@qq.com
  7 | @License :   (C)Copyright 2018-2019
  8 | @Desc    :   None
  9 | '''
 10 | 
 11 | # here put the import lib
 12 | import torch
 13 | import time
 14 | import cv2
 15 | import numpy as np
 16 | import os
 17 | from PIL import Image
 18 | 
 19 | from models import *
 20 | from utils.datasets import *
 21 | from utils.utils import *
 22 | 
 23 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 24 | 
 25 | 
 26 | class InferYOLOv3(object):
 27 |     def __init__(self,
 28 |                  cfg,
 29 |                  img_size,
 30 |                  weight_path,
 31 |                  data_cfg,
 32 |                  device,
 33 |                  conf_thres=0.5,
 34 |                  nms_thres=0.5):
 35 |         self.cfg = cfg
 36 |         self.img_size = img_size
 37 |         self.weight_path = weight_path
 38 |         # self.img_file = img_file
 39 |         self.device = device
 40 |         self.model = Darknet(cfg).to(device)
 41 |         self.model.load_state_dict(
 42 |             torch.load(weight_path, map_location=device)['model'])
 43 |         self.model.to(device).eval()
 44 |         self.classes = load_classes(parse_data_cfg(data_cfg)['names'])
 45 |         self.colors = [random.randint(0, 255) for _ in range(3)]
 46 |         self.conf_thres = conf_thres
 47 |         self.nms_thres = nms_thres
 48 | 
 49 |     def predict(self, im0):
 50 |         # singleDataloader = LoadSingleImages(img_file, img_size=img_size)
 51 |         # path, img, im0 = singleDataloader.__next__()
 52 | 
 53 |         img, _, _ = letterbox(im0, new_shape=self.img_size)
 54 | 
 55 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
 56 |         img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
 57 |         img /= 255.0
 58 | 
 59 |         # TODO: how to get img and im0
 60 | 
 61 |         img = torch.from_numpy(img).unsqueeze(0).to(self.device)
 62 |         pred, _ = self.model(img)
 63 |         det = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0]
 64 | 
 65 |         if det is not None and len(det) > 0:
 66 |             # Rescale boxes from 416 to true image size
 67 |             det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
 68 |                                       im0.shape).round()
 69 | 
 70 |             # Print results to screen
 71 |             print('%gx%g ' % img.shape[2:], end='')  # print image size
 72 |             for c in det[:, -1].unique():
 73 |                 n = (det[:, -1] == c).sum()
 74 |                 print('%g %ss' % (n, self.classes[int(c)]), end=', ')
 75 | 
 76 |             img = np.array(img.cpu())
 77 |             # Draw bounding boxes and labels of detections
 78 | 
 79 |             bboxes, confs, cls_confs, cls_ids = [], [], [], []
 80 | 
 81 |             for *xyxy, conf, cls_conf, cls_id in det:
 82 |                 # label = '%s %.2f' % (classes[int(cls_id)], conf)
 83 |                 bboxes.append(xyxy)
 84 |                 confs.append(conf)
 85 |                 cls_confs.append(cls_conf)
 86 |                 cls_ids.append(cls_id)
 87 |                 # plot_one_box(xyxy, im0, label=label, color=colors)
 88 |             return np.array(bboxes), np.array(cls_confs), np.array(cls_ids)
 89 |         else:
 90 |             return None, None, None
 91 | 
 92 |     def plot_bbox(self, ori_img, boxes):
 93 |         img = ori_img
 94 |         height, width = img.shape[:2]
 95 |         for box in boxes:
 96 |             # get x1 x2 x3 x4
 97 |             x1 = int(round(((box[0] - box[2] / 2.0) * width).item()))
 98 |             y1 = int(round(((box[1] - box[3] / 2.0) * height).item()))
 99 |             x2 = int(round(((box[0] + box[2] / 2.0) * width).item()))
100 |             y2 = int(round(((box[1] + box[3] / 2.0) * height).item()))
101 |             cls_conf = box[5]
102 |             cls_id = box[6]
103 |             # import random
104 |             # color = random.choices(range(256),k=3)
105 |             color = [int(x) for x in np.random.randint(256, size=3)]
106 |             # put texts and rectangles
107 |             img = cv2.putText(img, self.class_names[cls_id], (x1, y1),
108 |                               cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
109 |             img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
110 |         return img
111 | 
112 |     def plot_one_box(x, img, color=None, label=None, line_thickness=None):
113 |         # Plots one bounding box on image img
114 |         tl = line_thickness or round(
115 |             0.002 * max(img.shape[0:2])) + 1  # line thickness
116 |         color = color or [random.randint(0, 255) for _ in range(3)]
117 |         c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
118 |         cv2.rectangle(img, c1, c2, color, thickness=tl)
119 |         if label:
120 |             tf = max(tl - 1, 1)  # font thickness
121 |             t_size = cv2.getTextSize(label, 0, fontScale=tl / 3,
122 |                                      thickness=tf)[0]
123 |             c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
124 |             cv2.rectangle(img, c1, c2, color, -1)  # filled
125 |             cv2.putText(img,
126 |                         label, (c1[0], c1[1] - 2),
127 |                         0,
128 |                         tl / 3, [225, 255, 255],
129 |                         thickness=tf,
130 |                         lineType=cv2.LINE_AA)
131 | 
132 | 
133 | if __name__ == "__main__":
134 |     #################################################
135 |     cfg = './yolov3-cbam.cfg'
136 |     img_size = 416
137 |     weight_path = './miniversion/best.pt'
138 |     img_file = "./miniversion/test.jpg"  #"./images/train2014/0137-2112.jpg"
139 |     data_cfg = "./miniversion/dataset1.data"
140 |     conf_thres = 0.5
141 |     nms_thres = 0.5
142 |     device = torch_utils.select_device()
143 |     #################################################
144 |     yolo = InferYOLOv3(cfg, img_size, weight_path, data_cfg, device)
145 |     # bbox_xcycwh, cls_conf, cls_ids = yolo(img_file)
146 |     # print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape)
147 | 
148 |     img = cv2.imread(img_file)
149 |     print(img.shape)
150 |     # im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
151 |     im = img
152 |     print(im.shape)
153 |     bbox_xcycwh, cls_conf, cls_ids = yolo.predict(im)
154 |     print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape)
155 | 
156 |     bboxs = []
157 |     for i in range(len(bbox_xcycwh)):
158 |         bboxs.append(tuple(int(bbox_xcycwh[i][j].tolist()) for j in range(4)))
159 |     
160 |     print(bboxs)
161 | 
162 | 


--------------------------------------------------------------------------------
/miniversion/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/miniversion/utils/__init__.py


--------------------------------------------------------------------------------
/miniversion/utils/google_utils.py:
--------------------------------------------------------------------------------
 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
 2 | # pip install --upgrade google-cloud-storage
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | 
 8 | # from google.cloud import storage
 9 | 
10 | 
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 |     # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 |     # Downloads a file from Google Drive, accepting presented query
14 |     # from utils.google_utils import *; gdrive_download()
15 |     t = time.time()
16 | 
17 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 |     if os.path.exists(name):  # remove existing
19 |         os.remove(name)
20 | 
21 |     # Attempt large file download
22 |     s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
23 |          "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
24 |              id, name),
25 |          'rm ./cookie']
26 |     r = sum([os.system(x) for x in s])  # run commands, get return zeros
27 | 
28 |     # Attempt small file download
29 |     if not os.path.exists(name):  # file size < 40MB
30 |         s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
31 |         r = os.system(s)
32 | 
33 |     # Error check
34 |     if r != 0:
35 |         os.system('rm ' + name)  # remove partial downloads
36 |         print('ERROR: Download failure ')
37 |         return r
38 | 
39 |     # Unzip if archive
40 |     if name.endswith('.zip'):
41 |         print('unzipping... ', end='')
42 |         os.system('unzip -q %s' % name)  # unzip
43 |         os.remove(name)  # remove zip to free space
44 | 
45 |     print('Done (%.1fs)' % (time.time() - t))
46 |     return r
47 | 
48 | 
49 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
50 |     # Uploads a file to a bucket
51 |     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
52 | 
53 |     storage_client = storage.Client()
54 |     bucket = storage_client.get_bucket(bucket_name)
55 |     blob = bucket.blob(destination_blob_name)
56 | 
57 |     blob.upload_from_filename(source_file_name)
58 | 
59 |     print('File {} uploaded to {}.'.format(
60 |         source_file_name,
61 |         destination_blob_name))
62 | 
63 | 
64 | def download_blob(bucket_name, source_blob_name, destination_file_name):
65 |     # Uploads a blob from a bucket
66 |     storage_client = storage.Client()
67 |     bucket = storage_client.get_bucket(bucket_name)
68 |     blob = bucket.blob(source_blob_name)
69 | 
70 |     blob.download_to_filename(destination_file_name)
71 | 
72 |     print('Blob {} downloaded to {}.'.format(
73 |         source_blob_name,
74 |         destination_file_name))
75 | 


--------------------------------------------------------------------------------
/miniversion/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def parse_model_cfg(path):
 7 |     # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3'
 8 |     if not path.endswith('.cfg'):  # add .cfg suffix if omitted
 9 |         path += '.cfg'
10 |     if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path):  # add cfg/ prefix if omitted
11 |         path = 'cfg' + os.sep + path
12 | 
13 |     with open(path, 'r') as f:
14 |         lines = f.read().split('\n')
15 |     lines = [x for x in lines if x and not x.startswith('#')]
16 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
17 |     mdefs = []  # module definitions
18 |     for line in lines:
19 |         if line.startswith('['):  # This marks the start of a new block
20 |             mdefs.append({})
21 |             mdefs[-1]['type'] = line[1:-1].rstrip()
22 |             if mdefs[-1]['type'] == 'convolutional':
23 |                 mdefs[-1]['batch_normalize'] = 0  # pre-populate with zeros (may be overwritten later)
24 |         else:
25 |             key, val = line.split("=")
26 |             key = key.rstrip()
27 | 
28 |             if 'anchors' in key:
29 |                 mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
30 |             else:
31 |                 mdefs[-1][key] = val.strip()
32 | 
33 |     # Check all fields are supported
34 |     supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',
35 |                  'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',
36 |                  'stride_x', 'stride_y']
37 | 
38 |     f = []  # fields
39 |     for x in mdefs[1:]:
40 |         [f.append(k) for k in x if k not in f]
41 |     u = [x for x in f if x not in supported]  # unsupported fields
42 |     assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path)
43 | 
44 |     return mdefs
45 | 
46 | 
47 | def parse_data_cfg(path):
48 |     # Parses the data configuration file
49 |     if not os.path.exists(path) and os.path.exists('data' + os.sep + path):  # add data/ prefix if omitted
50 |         path = 'data' + os.sep + path
51 | 
52 |     with open(path, 'r') as f:
53 |         lines = f.readlines()
54 | 
55 |     options = dict()
56 |     for line in lines:
57 |         line = line.strip()
58 |         if line == '' or line.startswith('#'):
59 |             continue
60 |         key, val = line.split('=')
61 |         options[key.strip()] = val.strip()
62 | 
63 |     return options
64 | 


--------------------------------------------------------------------------------
/miniversion/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | 
  5 | 
  6 | def init_seeds(seed=0):
  7 |     torch.manual_seed(seed)
  8 |     torch.cuda.manual_seed(seed)
  9 |     torch.cuda.manual_seed_all(seed)
 10 | 
 11 |     # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
 12 |     if seed == 0:
 13 |         torch.backends.cudnn.deterministic = True
 14 |         torch.backends.cudnn.benchmark = False
 15 | 
 16 | 
 17 | def select_device(device='', apex=False, batch_size=None):
 18 |     # device = 'cpu' or '0' or '0,1,2,3'
 19 |     cpu_request = device.lower() == 'cpu'
 20 |     if device and not cpu_request:  # if device requested other than 'cpu'
 21 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 22 |         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
 23 | 
 24 |     cuda = False if cpu_request else torch.cuda.is_available()
 25 |     if cuda:
 26 |         c = 1024 ** 2  # bytes to MB
 27 |         ng = torch.cuda.device_count()
 28 |         if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
 29 |             assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
 30 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
 31 |         s = 'Using CUDA ' + ('Apex ' if apex else '')  # apex for mixed precision https://github.com/NVIDIA/apex
 32 |         for i in range(0, ng):
 33 |             if i == 1:
 34 |                 s = ' ' * len(s)
 35 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
 36 |                   (s, i, x[i].name, x[i].total_memory / c))
 37 |     else:
 38 |         print('Using CPU')
 39 | 
 40 |     print('')  # skip a line
 41 |     return torch.device('cuda:0' if cuda else 'cpu')
 42 | 
 43 | 
 44 | def fuse_conv_and_bn(conv, bn):
 45 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
 46 |     with torch.no_grad():
 47 |         # init
 48 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
 49 |                                     conv.out_channels,
 50 |                                     kernel_size=conv.kernel_size,
 51 |                                     stride=conv.stride,
 52 |                                     padding=conv.padding,
 53 |                                     bias=True)
 54 | 
 55 |         # prepare filters
 56 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
 57 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
 58 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
 59 | 
 60 |         # prepare spatial bias
 61 |         if conv.bias is not None:
 62 |             b_conv = conv.bias
 63 |         else:
 64 |             b_conv = torch.zeros(conv.weight.size(0))
 65 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
 66 |         fusedconv.bias.copy_(b_conv + b_bn)
 67 | 
 68 |         return fusedconv
 69 | 
 70 | 
 71 | def model_info(model, report='summary'):
 72 |     # Plots a line-by-line description of a PyTorch model
 73 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
 74 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
 75 |     if report is 'full':
 76 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
 77 |         for i, (name, p) in enumerate(model.named_parameters()):
 78 |             name = name.replace('module_list.', '')
 79 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
 80 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
 81 |     print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
 82 | 
 83 | 
 84 | def load_classifier(name='resnet101', n=2):
 85 |     # Loads a pretrained model reshaped to n-class output
 86 |     import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
 87 |     model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
 88 | 
 89 |     # Display model properties
 90 |     for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']:
 91 |         print(x + ' =', eval(x))
 92 | 
 93 |     # Reshape output to n classes
 94 |     filters = model.last_linear.weight.shape[1]
 95 |     model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
 96 |     model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
 97 |     model.last_linear.out_features = n
 98 |     return model
 99 | 
100 | 
101 | from collections import defaultdict
102 | from torch.optim import Optimizer
103 | 
104 | 
105 | class Lookahead(Optimizer):
106 |     def __init__(self, optimizer, k=5, alpha=0.5):
107 |         self.optimizer = optimizer
108 |         self.k = k
109 |         self.alpha = alpha
110 |         self.param_groups = self.optimizer.param_groups
111 |         self.state = defaultdict(dict)
112 |         self.fast_state = self.optimizer.state
113 |         for group in self.param_groups:
114 |             group["counter"] = 0
115 | 
116 |     def update(self, group):
117 |         for fast in group["params"]:
118 |             param_state = self.state[fast]
119 |             if "slow_param" not in param_state:
120 |                 param_state["slow_param"] = torch.zeros_like(fast.data)
121 |                 param_state["slow_param"].copy_(fast.data)
122 |             slow = param_state["slow_param"]
123 |             slow += (fast.data - slow) * self.alpha
124 |             fast.data.copy_(slow)
125 | 
126 |     def update_lookahead(self):
127 |         for group in self.param_groups:
128 |             self.update(group)
129 | 
130 |     def step(self, closure=None):
131 |         loss = self.optimizer.step(closure)
132 |         for group in self.param_groups:
133 |             if group["counter"] == 0:
134 |                 self.update(group)
135 |             group["counter"] += 1
136 |             if group["counter"] >= self.k:
137 |                 group["counter"] = 0
138 |         return loss
139 | 
140 |     def state_dict(self):
141 |         fast_state_dict = self.optimizer.state_dict()
142 |         slow_state = {
143 |             (id(k) if isinstance(k, torch.Tensor) else k): v
144 |             for k, v in self.state.items()
145 |         }
146 |         fast_state = fast_state_dict["state"]
147 |         param_groups = fast_state_dict["param_groups"]
148 |         return {
149 |             "fast_state": fast_state,
150 |             "slow_state": slow_state,
151 |             "param_groups": param_groups,
152 |         }
153 | 
154 |     def load_state_dict(self, state_dict):
155 |         slow_state_dict = {
156 |             "state": state_dict["slow_state"],
157 |             "param_groups": state_dict["param_groups"],
158 |         }
159 |         fast_state_dict = {
160 |             "state": state_dict["fast_state"],
161 |             "param_groups": state_dict["param_groups"],
162 |         }
163 |         super(Lookahead, self).load_state_dict(slow_state_dict)
164 |         self.optimizer.load_state_dict(fast_state_dict)
165 |         self.fast_state = self.optimizer.state
166 | 
167 |     def add_param_group(self, param_group):
168 |         param_group["counter"] = 0
169 |         self.optimizer.add_param_group(param_group)
170 | 


--------------------------------------------------------------------------------
/miniversion/utils/utils_sort.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),(  0,255,  0),(  0,128,  0),(210,105, 30),(220, 20, 60),
 5 |             (192,192,192),(255,228,196),( 50,205, 50),(139,  0,139),(100,149,237),(138, 43,226),(238,130,238),
 6 |             (255,  0,255),(  0,100,  0),(127,255,  0),(255,  0,255),(  0,  0,205),(255,140,  0),(255,239,213),
 7 |             (199, 21,133),(124,252,  0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47),
 8 |             (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148,  0,211),(255, 99, 71),(144,238,144),
 9 |             (255,255,  0),(230,230,250),(  0,  0,255),(128,128,  0),(189,183,107),(255,255,224),(128,128,128),
10 |             (105,105,105),( 64,224,208),(205,133, 63),(  0,128,128),( 72,209,204),(139, 69, 19),(255,245,238),
11 |             (250,240,230),(152,251,152),(  0,255,255),(135,206,235),(  0,191,255),(176,224,230),(  0,250,154),
12 |             (245,255,250),(240,230,140),(245,222,179),(  0,139,139),(143,188,143),(255,  0,  0),(240,128,128),
13 |             (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220),
14 |             (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)]
15 | 
16 | 
17 | # def draw_bbox(img, box, cls_name, identity=None, offset=(0,0)):
18 | #     '''
19 | #         draw box of an id
20 | #     '''
21 | #     x1,y1,x2,y2 = [int(i+offset[idx%2]) for idx,i in enumerate(box)]
22 | #     # set color and label text
23 | #     color = COLORS_10[identity%len(COLORS_10)] if identity is not None else COLORS_10[0]
24 | #     label = '{} {}'.format(cls_name, identity)
25 | #     # box text and bar
26 | #     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
27 | #     cv2.rectangle(img,(x1, y1),(x2,y2),color,2)
28 | #     cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 | #     cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1)
30 | #     return img
31 | 
32 | 
33 | def plot_one_box(x, ori_img, color=None, label=None, line_thickness=None):
34 |     # Plots one bounding box on image img
35 |     img = ori_img
36 |     tl = line_thickness or round(
37 |         0.002 * max(img.shape[0:2])) + 1  # line thickness
38 |     color = color or [random.randint(0, 255) for _ in range(3)]
39 |     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
40 |     cv2.rectangle(img, c1, c2, color, thickness=tl)
41 |     if label:
42 |         tf = max(tl - 1, 1)  # font thickness
43 |         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
44 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
45 |         cv2.rectangle(img, c1, c2, color, -1)  # filled
46 |         cv2.putText(img,
47 |                     label, (c1[0], c1[1] - 2),
48 |                     0,
49 |                     tl / 3, [225, 255, 255],
50 |                     thickness=tf,
51 |                     lineType=cv2.LINE_AA)
52 |     return img
53 | 
54 | 
55 | '''
56 | deep sort 中的画图方法，在原图上进行作画
57 | '''
58 | def draw_bboxes(ori_img, bbox, identities=None, offset=(0,0)):
59 |     img = ori_img
60 |     for i,box in enumerate(bbox):
61 |         x1,y1,x2,y2 = [int(i) for i in box]
62 |         x1 += offset[0]
63 |         x2 += offset[0]
64 |         y1 += offset[1]
65 |         y2 += offset[1]
66 |         # box text and bar
67 |         id = int(identities[i]) if identities is not None else 0
68 |         color = COLORS_10[id%len(COLORS_10)]
69 |         label = '{}{:d}'.format("", id)
70 |         # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
71 |         img = plot_one_box([x1,y1,x2,y2], img, color, label)
72 |         # cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
73 |         # cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
74 |         # cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
75 |     return img
76 | 
77 | 
78 | 
79 | 
80 | 
81 | def softmax(x):
82 |     assert isinstance(x, np.ndarray), "expect x be a numpy array"
83 |     x_exp = np.exp(x*5)
84 |     return x_exp/x_exp.sum()
85 | 
86 | def softmin(x):
87 |     assert isinstance(x, np.ndarray), "expect x be a numpy array"
88 |     x_exp = np.exp(-x)
89 |     return x_exp/x_exp.sum()
90 | 
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     x = np.arange(10)/10.
95 |     x = np.array([0.5,0.5,0.5,0.6,1.])
96 |     y = softmax(x)
97 |     z = softmin(x)
98 |     import ipdb; ipdb.set_trace()


--------------------------------------------------------------------------------
/miniversion/utils/visdom.py:
--------------------------------------------------------------------------------
 1 | import visdom
 2 | import time
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Visualizer(object):
 7 |     def __init__(self, env='default', **kwargs):
 8 |         self.vis = visdom.Visdom(env=env, **kwargs)
 9 |         self.index = {}
10 | 
11 |     def plot_many_stack(self, d):
12 |         '''
13 |         self.plot('loss',1.00)
14 |         '''
15 |         name = list(d.keys())
16 |         name_total = " ".join(name)
17 |         x = self.index.get(name_total, 0)
18 |         val = list(d.values())
19 |         if len(val) == 1:
20 |             y = np.array(val)
21 |         else:
22 |             y = np.array(val).reshape(-1, len(val))
23 |         # print(x)
24 |         self.vis.line(
25 |             Y=y,
26 |             X=np.ones(y.shape) * x,
27 |             win=str(name_total),  # unicode
28 |             opts=dict(legend=name, title=name_total),
29 |             update=None if x == 0 else 'append')
30 |         self.index[name_total] = x + 1


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import time
  3 | import cv2
  4 | import numpy as np
  5 | import os
  6 | from PIL import Image
  7 | 
  8 | from models import *
  9 | from utils.datasets import *
 10 | from utils.utils import *
 11 | 
 12 | os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 13 | 
 14 | 
 15 | class InferYOLOv3(object):
 16 |     def __init__(self,
 17 |                  cfg,
 18 |                  img_size,
 19 |                  weight_path,
 20 |                  data_cfg,
 21 |                  device,
 22 |                  conf_thres=0.5,
 23 |                  nms_thres=0.5):
 24 |         self.cfg = cfg
 25 |         self.img_size = img_size
 26 |         self.weight_path = weight_path
 27 |         # self.img_file = img_file
 28 |         self.device = device
 29 |         self.model = Darknet(cfg).to(device)
 30 |         self.model.load_state_dict(
 31 |             torch.load(weight_path, map_location=device)['model'])
 32 |         self.model.to(device).eval()
 33 |         self.classes = load_classes(parse_data_cfg(data_cfg)['names'])
 34 |         self.colors = [random.randint(0, 255) for _ in range(3)]
 35 |         self.conf_thres = conf_thres
 36 |         self.nms_thres = nms_thres
 37 | 
 38 |     def predict(self, im0):
 39 |         # singleDataloader = LoadSingleImages(img_file, img_size=img_size)
 40 |         # path, img, im0 = singleDataloader.__next__()
 41 | 
 42 |         img, _, _ = letterbox(im0, new_shape=self.img_size)
 43 | 
 44 |         img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB
 45 |         img = np.ascontiguousarray(img, dtype=np.float32)  # uint8 to float32
 46 |         img /= 255.0
 47 | 
 48 |         # TODO: how to get img and im0
 49 | 
 50 |         img = torch.from_numpy(img).unsqueeze(0).to(self.device)
 51 |         pred, _ = self.model(img)
 52 |         det = non_max_suppression(pred, self.conf_thres, self.nms_thres)[0]
 53 | 
 54 |         if det is not None and len(det) > 0:
 55 |             # Rescale boxes from 416 to true image size
 56 |             det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
 57 |                                       im0.shape).round()
 58 | 
 59 |             # Print results to screen
 60 |             # print('%gx%g ' % img.shape[2:], end='')  # print image size
 61 |             for c in det[:, -1].unique():
 62 |                 n = (det[:, -1] == c).sum()
 63 |                 # print('%g %ss' % (n, self.classes[int(c)]), end=', ')
 64 | 
 65 |             img = np.array(img.cpu())
 66 |             # Draw bounding boxes and labels of detections
 67 | 
 68 |             bboxes, confs, cls_confs, cls_ids = [], [], [], []
 69 | 
 70 |             for *xyxy, conf, cls_conf, cls_id in det:
 71 |                 # label = '%s %.2f' % (classes[int(cls_id)], conf)
 72 |                 bboxes.append(xyxy)
 73 |                 confs.append(conf)
 74 |                 cls_confs.append(cls_conf)
 75 |                 cls_ids.append(cls_id)
 76 |                 # plot_one_box(xyxy, im0, label=label, color=colors)
 77 |             return np.array(bboxes), np.array(cls_confs), np.array(cls_ids)
 78 |         else:
 79 |             return None, None, None
 80 | 
 81 |     def plot_bbox(self, ori_img, boxes):
 82 |         img = ori_img
 83 |         height, width = img.shape[:2]
 84 |         for box in boxes:
 85 |             # get x1 x2 x3 x4
 86 |             x1 = int(round(((box[0] - box[2] / 2.0) * width).item()))
 87 |             y1 = int(round(((box[1] - box[3] / 2.0) * height).item()))
 88 |             x2 = int(round(((box[0] + box[2] / 2.0) * width).item()))
 89 |             y2 = int(round(((box[1] + box[3] / 2.0) * height).item()))
 90 |             cls_conf = box[5]
 91 |             cls_id = box[6]
 92 |             # import random
 93 |             # color = random.choices(range(256),k=3)
 94 |             color = [int(x) for x in np.random.randint(256, size=3)]
 95 |             # put texts and rectangles
 96 |             img = cv2.putText(img, self.class_names[cls_id], (x1, y1),
 97 |                               cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
 98 |             img = cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
 99 |         return img
100 | 
101 |     def plot_one_box(x, img, color=None, label=None, line_thickness=None):
102 |         # Plots one bounding box on image img
103 |         tl = line_thickness or round(
104 |             0.002 * max(img.shape[0:2])) + 1  # line thickness
105 |         color = color or [random.randint(0, 255) for _ in range(3)]
106 |         c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
107 |         cv2.rectangle(img, c1, c2, color, thickness=tl)
108 |         if label:
109 |             tf = max(tl - 1, 1)  # font thickness
110 |             t_size = cv2.getTextSize(label, 0, fontScale=tl / 3,
111 |                                      thickness=tf)[0]
112 |             c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
113 |             cv2.rectangle(img, c1, c2, color, -1)  # filled
114 |             cv2.putText(img,
115 |                         label, (c1[0], c1[1] - 2),
116 |                         0,
117 |                         tl / 3, [225, 255, 255],
118 |                         thickness=tf,
119 |                         lineType=cv2.LINE_AA)
120 | 
121 | 
122 | if __name__ == "__main__":
123 |     #################################################
124 |     cfg = './cfg/yolov3-1cls.cfg'
125 |     img_size = 416
126 |     weight_path = './weights/best.pt'
127 |     img_file = "/home/dongpeijie/datasets/data_with_labelimg/images/train2014/0137-1162.jpg"
128 |     data_cfg = "./data/voc_small.data"
129 |     conf_thres = 0.5
130 |     nms_thres = 0.5
131 |     device = torch_utils.select_device()
132 |     #################################################
133 |     yolo = InferYOLOv3(cfg, img_size, weight_path, data_cfg, device)
134 |     # bbox_xcycwh, cls_conf, cls_ids = yolo(img_file)
135 |     # print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape)
136 | 
137 |     img = cv2.imread(img_file)
138 |     im = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
139 |     im = img
140 |     print(im.shape)
141 |     bbox_xcycwh, cls_conf, cls_ids = yolo.predict(im)
142 |     print(bbox_xcycwh.shape, cls_conf.shape, cls_ids.shape)
143 | 


--------------------------------------------------------------------------------
/sort.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import time
  4 | import argparse
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | from predict import InferYOLOv3
  9 | from utils.utils import xyxy2xywh
 10 | from deep_sort import DeepSort
 11 | from utils.utils_sort import COLORS_10, draw_bboxes
 12 | from sort.sort import *
 13 | 
 14 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 15 | 
 16 | 
 17 | class Detector(object):
 18 |     def __init__(self, args):
 19 |         self.args = args
 20 |         if args.display:
 21 |             cv2.namedWindow("test", cv2.WINDOW_NORMAL)
 22 |             cv2.resizeWindow("test", args.display_width, args.display_height)
 23 |         device = torch.device(
 24 |             'cuda') if torch.cuda.is_available() else torch.device('cpu')
 25 |         self.vdo = cv2.VideoCapture()
 26 |         self.yolo3 = InferYOLOv3(args.yolo_cfg,
 27 |                                  args.img_size,
 28 |                                  args.yolo_weights,
 29 |                                  args.data_cfg,
 30 |                                  device,
 31 |                                  conf_thres=args.conf_thresh,
 32 |                                  nms_thres=args.nms_thresh)
 33 |         # self.deepsort = DeepSort(args.deepsort_checkpoint)
 34 |         self.mot_tracker_sort = Sort()
 35 |         self.class_names = self.yolo3.classes
 36 | 
 37 |     def __enter__(self):
 38 |         assert os.path.isfile(self.args.VIDEO_PATH), "Error: path error"
 39 |         self.vdo.open(self.args.VIDEO_PATH)
 40 |         self.im_width = int(self.vdo.get(cv2.CAP_PROP_FRAME_WIDTH))
 41 |         self.im_height = int(self.vdo.get(cv2.CAP_PROP_FRAME_HEIGHT))
 42 | 
 43 |         if self.args.save_path:
 44 |             fourcc = cv2.VideoWriter_fourcc(*'MJPG')
 45 |             self.output = cv2.VideoWriter(self.args.save_path, fourcc, 20,
 46 |                                           (self.im_width, self.im_height))
 47 | 
 48 |         assert self.vdo.isOpened()
 49 |         return self
 50 | 
 51 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 52 |         if exc_type:
 53 |             print(exc_type, exc_value, exc_traceback)
 54 | 
 55 |     def detect(self):
 56 |         frame_cnt = -1
 57 |         while self.vdo.grab():
 58 |             frame_cnt += 1
 59 |             start = time.time()
 60 |             _, ori_im = self.vdo.retrieve()
 61 |             # im = cv2.cvtColor(ori_im, cv2.COLOR_BGR2RGB)
 62 |             im = ori_im
 63 | 
 64 |             t1_begin = time.time()
 65 |             bbox_xxyy, cls_conf, cls_ids = self.yolo3.predict(im)
 66 |             t1_end = time.time()
 67 | 
 68 |             t2_begin = time.time()
 69 |             if bbox_xxyy is not None:
 70 |                 # select class cow
 71 |                 # mask = cls_ids == 0
 72 |                 # bbox_xxyy = bbox_xxyy[mask]
 73 | 
 74 |                 # bbox_xxyy[:, 3:] *= 1.2
 75 |                 # cls_conf = cls_conf[mask]
 76 | 
 77 |                 # bbox_xcycwh = bbox_xxyy
 78 |                 # print(" "*10, bbox_xcycwh.shape, cls_conf.shape)
 79 |                 detections = []
 80 |                 for i in range(len(bbox_xxyy)):
 81 |                     # print(bbox_xxyy[i][0].item(), bbox_xxyy[i][1].item(),
 82 |                     #       bbox_xxyy[i][2].item(), bbox_xxyy[i][3].item(),
 83 |                     #       cls_conf[i].tolist())
 84 |                     detections.append([
 85 |                         bbox_xxyy[i][0].item(), bbox_xxyy[i][1].item(),
 86 |                         bbox_xxyy[i][2].item(), bbox_xxyy[i][3].item(),
 87 |                         cls_conf[i].tolist()
 88 |                     ])
 89 |                     # detections.append([*bbox_xcycwh[i].tolist(), cls_conf[i].tolist()])
 90 |                     # print("=" * 30, [*bbox_xcycwh[i], cls_conf[i]])
 91 |                 # print('-'*30, detections)
 92 |                 detections = torch.tensor(detections)
 93 |                 outputs = self.mot_tracker_sort.update(detections)
 94 |                 if len(outputs) > 0:
 95 |                     bbox_xyxy = outputs[:, :4]
 96 |                     identities = outputs[:, -1]
 97 |                     ori_im = draw_bboxes(ori_im, bbox_xyxy, identities)
 98 |             t2_end = time.time()
 99 | 
100 |             end = time.time()
101 |             print(
102 |                 "frame:%d|det:%.4f|sort:%.4f|total:%.4f|det p:%.2f%%|fps:%.2f"
103 |                 % (frame_cnt, (t1_end - t1_begin), (t2_end - t2_begin),
104 |                    (end - start), ((t1_end - t1_begin) * 100 /
105 |                                    ((end - start))), (1 / (end - start))))
106 |             if self.args.display:
107 |                 cv2.imshow("test", ori_im)
108 |                 cv2.waitKey(1)
109 | 
110 |             if self.args.save_path:
111 |                 self.output.write(ori_im)
112 | 
113 | 
114 | def parse_args():
115 |     parser = argparse.ArgumentParser()
116 |     parser.add_argument("VIDEO_PATH", type=str)
117 |     parser.add_argument("--yolo_cfg",
118 |                         type=str,
119 |                         default="cfg/yolov3-1cls.cfg")
120 |     parser.add_argument("--yolo_weights",
121 |                         type=str,
122 |                         default="./weights/best.pt")
123 |     parser.add_argument("--yolo_names",
124 |                         type=str,
125 |                         default="cfg/coco.names")
126 |     parser.add_argument("--conf_thresh", type=float, default=0.5)
127 |     parser.add_argument("--nms_thresh", type=float, default=0.4)
128 |     parser.add_argument("--deepsort_checkpoint",
129 |                         type=str,
130 |                         default="deep_sort/deep/checkpoint/best.pt")
131 |     parser.add_argument("--max_dist", type=float, default=0.2)
132 |     parser.add_argument("--ignore_display",
133 |                         dest="display",
134 |                         action="store_false")
135 |     parser.add_argument("--display_width", type=int, default=800)
136 |     parser.add_argument("--display_height", type=int, default=600)
137 |     parser.add_argument("--save_path", type=str, default="demo.avi")
138 |     parser.add_argument("--data_cfg",
139 |                         type=str,
140 |                         default="data/voc_small.data")
141 |     parser.add_argument("--img_size", type=int, default=416, help="img size")
142 | 
143 |     return parser.parse_args()
144 | 
145 | 
146 | if __name__ == "__main__":
147 |     args = parse_args()
148 |     with Detector(args) as det:
149 |         det.detect()
150 | 
151 |     os.system("ffmpeg -y -i demo.avi -r 10 -b:a 32k %s_output.mp4" %
152 |               (os.path.basename(args.VIDEO_PATH).split('.')[0]))
153 | 


--------------------------------------------------------------------------------
/sort/README.md:
--------------------------------------------------------------------------------
  1 | SORT
  2 | =====
  3 | 
  4 | A simple online and realtime tracking algorithm for 2D multiple object tracking in video sequences.
  5 | See an example [video here](https://motchallenge.net/movies/ETH-Linthescher-SORT.mp4).
  6 | 
  7 | By Alex Bewley  
  8 | 
  9 | ### Introduction
 10 | 
 11 | SORT is a barebones implementation of a visual multiple object tracking framework based on rudimentary data association and state estimation techniques. It is designed for online tracking applications where only past and current frames are available and the method produces object identities on the fly. While this minimalistic tracker doesn't handle occlusion or re-entering objects its purpose is to serve as a baseline and testbed for the development of future trackers.
 12 | 
 13 | SORT was initially described in an [arXiv tech report](http://arxiv.org/abs/1602.00763). At the time of the initial publication, SORT was ranked the best *open source* multiple object tracker on the [MOT benchmark](https://motchallenge.net/results/2D_MOT_2015/).
 14 | 
 15 | This code has been tested on Mac OSX 10.10, and Ubuntu 14.04, with Python 2.7 (anaconda).
 16 | 
 17 | **Note:** A significant proportion of SORT's accuracy is attributed to the detections.
 18 | For your convenience, this repo also contains *Faster* RCNN detections for the MOT benchmark sequences in the [benchmark format](https://motchallenge.net/instructions/). To run the detector yourself please see the original [*Faster* RCNN project](https://github.com/ShaoqingRen/faster_rcnn) or the python reimplementation of [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn) by Ross Girshick.
 19 | 
 20 | **Also see:**
 21 | A new and improved version of SORT with a Deep Association Metric implemented in tensorflow is available at [https://github.com/nwojke/deep_sort](https://github.com/nwojke/deep_sort) .
 22 | 
 23 | ### License
 24 | 
 25 | SORT is released under the GPL License (refer to the LICENSE file for details) to promote the open use of the tracker and future improvements. If you require a permissive license contact Alex (alex@bewley.ai).
 26 | 
 27 | ### Citing SORT
 28 | 
 29 | If you find this repo useful in your research, please consider citing:
 30 | 
 31 |     @inproceedings{Bewley2016_sort,
 32 |       author={Bewley, Alex and Ge, Zongyuan and Ott, Lionel and Ramos, Fabio and Upcroft, Ben},
 33 |       booktitle={2016 IEEE International Conference on Image Processing (ICIP)},
 34 |       title={Simple online and realtime tracking},
 35 |       year={2016},
 36 |       pages={3464-3468},
 37 |       keywords={Benchmark testing;Complexity theory;Detectors;Kalman filters;Target tracking;Visualization;Computer Vision;Data Association;Detection;Multiple Object Tracking},
 38 |       doi={10.1109/ICIP.2016.7533003}
 39 |     }
 40 | 
 41 | 
 42 | ### Dependencies:
 43 | 
 44 | This code makes use of the following packages:
 45 | 1. [`scikit-learn`](http://scikit-learn.org/stable/)
 46 | 0. [`scikit-image`](http://scikit-image.org/download)
 47 | 0. [`FilterPy`](https://github.com/rlabbe/filterpy)
 48 | 
 49 | To install required dependencies run:
 50 | ```
 51 | $ pip install -r requirements.txt
 52 | ```
 53 | 
 54 | 
 55 | ### Demo:
 56 | 
 57 | To run the tracker with the provided detections:
 58 | 
 59 | ```
 60 | $ cd path/to/sort
 61 | $ python sort.py
 62 | ```
 63 | 
 64 | To display the results you need to:
 65 | 
 66 | 0. Download the [2D MOT 2015 benchmark dataset](https://motchallenge.net/data/2D_MOT_2015/#download)
 67 | 0. Create a symbolic link to the dataset
 68 |   ```
 69 |   $ ln -s /path/to/MOT2015_challenge/data/2DMOT2015 mot_benchmark
 70 |   ```
 71 | 0. Run the demo with the ```--display``` flag
 72 |   ```
 73 |   $ python sort.py --display
 74 |   ```
 75 | 
 76 | 
 77 | ### Main Results
 78 | 
 79 | Using the [MOT challenge devkit](https://motchallenge.net/devkit/) the method produces the following results (as described in the paper).
 80 | 
 81 |  Sequence       | Rcll | Prcn |  FAR | GT  MT  PT  ML|   FP    FN  IDs   FM|  MOTA  MOTP MOTAL
 82 | --------------- |:----:|:----:|:----:|:-------------:|:-------------------:|:------------------:
 83 |  TUD-Campus     | 68.5 | 94.3 | 0.21 |  8   6   2   0|   15   113    6    9|  62.7  73.7  64.1
 84 |  ETH-Sunnyday   | 77.5 | 81.9 | 0.90 | 30  11  16   3|  319   418   22   54|  59.1  74.4  60.3
 85 |  ETH-Pedcross2  | 51.9 | 90.8 | 0.39 | 133  17  60  56|  330  3014   77  103|  45.4  74.8  46.6
 86 |  ADL-Rundle-8   | 44.3 | 75.8 | 1.47 | 28   6  16   6|  959  3781  103  211|  28.6  71.1  30.1
 87 |  Venice-2       | 42.5 | 64.8 | 2.75 | 26   7   9  10| 1650  4109   57  106|  18.6  73.4  19.3
 88 |  KITTI-17       | 67.1 | 92.3 | 0.26 |  9   1   8   0|   38   225    9   16|  60.2  72.3  61.3
 89 |  *Overall*      | 49.5 | 77.5 | 1.24 | 234  48 111  75| 3311 11660  274  499|  34.0  73.3  35.1
 90 | 
 91 | 
 92 | ### Using SORT in your own project
 93 | 
 94 | Below is the gist of how to instantiate and update SORT. See the ['__main__'](https://github.com/abewley/sort/blob/master/sort.py#L239) section of [sort.py](https://github.com/abewley/sort/blob/master/sort.py#L239) for a complete example.
 95 |     
 96 |     from sort import *
 97 |     
 98 |     #create instance of SORT
 99 |     mot_tracker = Sort() 
100 |     
101 |     # get detections
102 |     ...
103 |     
104 |     # update SORT
105 |     track_bbs_ids = mot_tracker.update(detections)
106 | 
107 |     # track_bbs_ids is a np array where each row contains a valid bounding box and track_id (last column)
108 |     ...
109 |     
110 |  
111 | 


--------------------------------------------------------------------------------
/sort/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/sort/__init__.py


--------------------------------------------------------------------------------
/sort/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | filterpy==1.4.1
3 | numba==0.38.1
4 | scikit-image==0.14.0
5 | scikit-learn==0.19.1
6 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pprp/deep_sort_yolov3_pytorch/f6d3f134b7007d393588ccbfde6f460111c316ae/utils/__init__.py


--------------------------------------------------------------------------------
/utils/anchor_cluster.py:
--------------------------------------------------------------------------------
  1 | #coding=utf-8
  2 | import xml.etree.ElementTree as ET
  3 | import numpy as np
  4 | import glob
  5 | 
  6 | 
  7 | def iou(box, clusters):
  8 |     """
  9 |     计算一个ground truth边界盒和k个先验框(Anchor)的交并比(IOU)值。
 10 |     参数box: 元组或者数据，代表ground truth的长宽。
 11 |     参数clusters: 形如(k,2)的numpy数组，其中k是聚类Anchor框的个数
 12 |     返回：ground truth和每个Anchor框的交并比。
 13 |     """
 14 |     x = np.minimum(clusters[:, 0], box[0])
 15 |     y = np.minimum(clusters[:, 1], box[1])
 16 |     if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
 17 |         raise ValueError("Box has no area")
 18 |     intersection = x * y
 19 |     box_area = box[0] * box[1]
 20 |     cluster_area = clusters[:, 0] * clusters[:, 1]
 21 |     iou_ = intersection / (box_area + cluster_area - intersection)
 22 |     return iou_
 23 | 
 24 | 
 25 | def avg_iou(boxes, clusters):
 26 |     """
 27 |     计算一个ground truth和k个Anchor的交并比的均值。
 28 |     """
 29 |     return np.mean(
 30 |         [np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
 31 | 
 32 | 
 33 | def kmeans(boxes, k, dist=np.median):
 34 |     """
 35 |     利用IOU值进行K-means聚类
 36 |     参数boxes: 形状为(r, 2)的ground truth框，其中r是ground truth的个数
 37 |     参数k: Anchor的个数
 38 |     参数dist: 距离函数
 39 |     返回值：形状为(k, 2)的k个Anchor框
 40 |     """
 41 |     # 即是上面提到的r
 42 |     rows = boxes.shape[0]
 43 |     # 距离数组，计算每个ground truth和k个Anchor的距离
 44 |     distances = np.empty((rows, k))
 45 |     # 上一次每个ground truth"距离"最近的Anchor索引
 46 |     last_clusters = np.zeros((rows, ))
 47 |     # 设置随机数种子
 48 |     np.random.seed()
 49 | 
 50 |     # 初始化聚类中心，k个簇，从r个ground truth随机选k个
 51 |     clusters = boxes[np.random.choice(rows, k, replace=False)]
 52 |     # 开始聚类
 53 |     while True:
 54 |         # 计算每个ground truth和k个Anchor的距离，用1-IOU(box,anchor)来计算
 55 |         for row in range(rows):
 56 |             distances[row] = 1 - iou(boxes[row], clusters)
 57 |         # 对每个ground truth，选取距离最小的那个Anchor，并存下索引
 58 |         nearest_clusters = np.argmin(distances, axis=1)
 59 |         # 如果当前每个ground truth"距离"最近的Anchor索引和上一次一样，聚类结束
 60 |         if (last_clusters == nearest_clusters).all():
 61 |             break
 62 |         # 更新簇中心为簇里面所有的ground truth框的均值
 63 |         for cluster in range(k):
 64 |             clusters[cluster] = dist(boxes[nearest_clusters == cluster],
 65 |                                      axis=0)
 66 |         # 更新每个ground truth"距离"最近的Anchor索引
 67 |         last_clusters = nearest_clusters
 68 | 
 69 |     return clusters
 70 | 
 71 | 
 72 | # 加载自己的数据集，只需要所有labelimg标注出来的xml文件即可
 73 | def load_dataset(path):
 74 |     dataset = []
 75 |     for xml_file in glob.glob("{}/*xml".format(path)):
 76 |         tree = ET.parse(xml_file)
 77 |         # 图片高度
 78 |         height = int(tree.findtext("./size/height"))
 79 |         # 图片宽度
 80 |         width = int(tree.findtext("./size/width"))
 81 | 
 82 |         for obj in tree.iter("object"):
 83 |             # 偏移量
 84 |             xmin = int(obj.findtext("bndbox/xmin")) / width
 85 |             ymin = int(obj.findtext("bndbox/ymin")) / height
 86 |             xmax = int(obj.findtext("bndbox/xmax")) / width
 87 |             ymax = int(obj.findtext("bndbox/ymax")) / height
 88 |             xmin = np.float64(xmin)
 89 |             ymin = np.float64(ymin)
 90 |             xmax = np.float64(xmax)
 91 |             ymax = np.float64(ymax)
 92 |             if xmax == xmin or ymax == ymin:
 93 |                 print(xml_file)
 94 |             # 将Anchor的长宽放入dateset，运行kmeans获得Anchor
 95 |             dataset.append([xmax - xmin, ymax - ymin])
 96 |     return np.array(dataset)
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 | 
101 |     ANNOTATIONS_PATH = r"I:\Dataset\datasets1\VOC2007\Annotations"
102 |     #"/home/dongpeijie/datasets/voc2007_for_yolo_torch-master/Annotations"  #xml文件所在文件夹
103 |     CLUSTERS = 9  #聚类数量，anchor数量
104 |     INPUTDIM = 416  #输入网络大小
105 | 
106 |     data = load_dataset(ANNOTATIONS_PATH)
107 | 
108 |     out = kmeans(data, k=CLUSTERS)
109 | 
110 |     print('Boxes:')
111 |     # print(np.array(out) * INPUTDIM)
112 | 
113 |     anchors = np.array(out)*INPUTDIM
114 | 
115 |     anchors = np.sort(anchors, axis=0*1)
116 | 
117 |     print("=================")
118 |     for i in range(len(anchors)):
119 |         print("%.2f,%.2f, " % (anchors[i][0], anchors[i][1]), end="")
120 | 
121 |     print("\n=================")
122 | 
123 |     print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
124 |     final_anchors = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
125 |     print("Before Sort Ratios:\n {}".format(final_anchors))
126 |     print("After Sort Ratios:\n {}".format(sorted(final_anchors)))


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
 1 | # This file contains google utils: https://cloud.google.com/storage/docs/reference/libraries
 2 | # pip install --upgrade google-cloud-storage
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | 
 8 | # from google.cloud import storage
 9 | 
10 | 
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 |     # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 |     # Downloads a file from Google Drive, accepting presented query
14 |     # from utils.google_utils import *; gdrive_download()
15 |     t = time.time()
16 | 
17 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 |     if os.path.exists(name):  # remove existing
19 |         os.remove(name)
20 | 
21 |     # Attempt large file download
22 |     s = ["curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id,
23 |          "curl -Lb ./cookie -s \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
24 |              id, name),
25 |          'rm ./cookie']
26 |     r = sum([os.system(x) for x in s])  # run commands, get return zeros
27 | 
28 |     # Attempt small file download
29 |     if not os.path.exists(name):  # file size < 40MB
30 |         s = 'curl -f -L -o %s https://drive.google.com/uc?export=download&id=%s' % (name, id)
31 |         r = os.system(s)
32 | 
33 |     # Error check
34 |     if r != 0:
35 |         os.system('rm ' + name)  # remove partial downloads
36 |         print('ERROR: Download failure ')
37 |         return r
38 | 
39 |     # Unzip if archive
40 |     if name.endswith('.zip'):
41 |         print('unzipping... ', end='')
42 |         os.system('unzip -q %s' % name)  # unzip
43 |         os.remove(name)  # remove zip to free space
44 | 
45 |     print('Done (%.1fs)' % (time.time() - t))
46 |     return r
47 | 
48 | 
49 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
50 |     # Uploads a file to a bucket
51 |     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
52 | 
53 |     storage_client = storage.Client()
54 |     bucket = storage_client.get_bucket(bucket_name)
55 |     blob = bucket.blob(destination_blob_name)
56 | 
57 |     blob.upload_from_filename(source_file_name)
58 | 
59 |     print('File {} uploaded to {}.'.format(
60 |         source_file_name,
61 |         destination_blob_name))
62 | 
63 | 
64 | def download_blob(bucket_name, source_blob_name, destination_file_name):
65 |     # Uploads a blob from a bucket
66 |     storage_client = storage.Client()
67 |     bucket = storage_client.get_bucket(bucket_name)
68 |     blob = bucket.blob(source_blob_name)
69 | 
70 |     blob.download_to_filename(destination_file_name)
71 | 
72 |     print('Blob {} downloaded to {}.'.format(
73 |         source_blob_name,
74 |         destination_file_name))
75 | 


--------------------------------------------------------------------------------
/utils/layers.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | 
  4 | '''
  5 | shape对应的数必须是奇数
  6 | 
  7 | [spatialmaxpool]
  8 | # 52x52 26x26 13x13
  9 | from=75, 70, 62
 10 | shape=13, 13, 13
 11 | out_plane = 128
 12 | '''
 13 | class SpatialMaxpool(nn.Module):
 14 |     def __init__(self, shapes, filters, out_plane=128):
 15 |         # shapes: type=list
 16 |         # filters: type=list
 17 |         super(SpatialMaxpool, self).__init__()
 18 | 
 19 |         self.spp1 = nn.MaxPool2d(  # 52
 20 |             kernel_size=shapes[0],
 21 |             stride=1,
 22 |             padding=int((shapes[0] - 1) // 2))
 23 |         self.conv1x1_1 = nn.Conv2d(filters[0], out_plane, kernel_size=3,
 24 |                                    stride=2,
 25 |                                    padding=1)
 26 | 
 27 |         self.spp2 = nn.MaxPool2d(  # 26
 28 |             kernel_size=shapes[1],
 29 |             stride=1,
 30 |             padding=int((shapes[1] - 1) // 2))
 31 |         self.conv1x1_2 = nn.Conv2d(filters[1], out_plane, kernel_size=1,
 32 |                                    stride=1,
 33 |                                    padding=0)
 34 | 
 35 |         self.spp3 = nn.MaxPool2d(  # 13
 36 |             kernel_size=shapes[2],
 37 |             stride=1,
 38 |             padding=int((shapes[2] - 1) // 2))
 39 |         self.conv1x1_3 = nn.Conv2d(filters[2], out_plane, kernel_size=1,
 40 |                                    stride=1,
 41 |                                    padding=0)
 42 | 
 43 |         self.us_spp3 = nn.Upsample(scale_factor=2, mode='nearest')
 44 | 
 45 |     def forward(self, x1, x2, x3):
 46 |         # 52 26 13
 47 |         out1 = self.conv1x1_1(self.spp1(x1))
 48 |         out2 = self.conv1x1_2(self.spp2(x2))
 49 |         out3 = self.us_spp3(self.conv1x1_3(self.spp3(x3)))
 50 |         return out1+out2+out3
 51 | 
 52 | 
 53 | '''
 54 | 并不是常规的se，而是特殊的se
 55 | # layer=80
 56 | [se]
 57 | # attention feature
 58 | from=62, -1
 59 | reduction=4
 60 | out_plane=256# 这个地方要跟上边的值保持一致
 61 | '''
 62 | 
 63 | class SpecialSE(nn.Module):
 64 |     def __init__(self, in_plane, out_plane, reduction=4):
 65 |         super(SpecialSE, self).__init__()
 66 |         self.out_plane = out_plane
 67 |         self.gap = nn.AdaptiveAvgPool2d(1)
 68 |         self.fc = nn.Sequential(
 69 |             nn.Linear(in_plane, in_plane//4, bias=False),
 70 |             nn.ReLU(inplace=True),
 71 |             nn.Linear(in_plane//4, out_plane, bias=False),
 72 |             nn.Sigmoid()
 73 |         )
 74 | 
 75 |     def forward(self, attention, y):
 76 |         # apply the attention extracted from x to y
 77 |         b, c, _, _ = attention.size()
 78 |         attention = self.gap(attention).view(b, c)
 79 |         channel_attention = self.fc(attention).view(b, self.out_plane, 1, 1)
 80 |         return channel_attention * y
 81 | 
 82 | 
 83 | if __name__ == "__main__":
 84 |     model=SpatialMaxpool(shapes=[13, 13, 13], filters=[128, 128, 512],out_plane=256)
 85 | 
 86 |     x1 = torch.zeros((3, 128, 52, 52))
 87 |     x2 = torch.zeros((3, 128, 26, 26))
 88 |     x3 = torch.zeros((3, 512, 13, 13))
 89 | 
 90 |     print(model(x1,x2,x3).shape)
 91 | 
 92 |     # # attention, feature
 93 |     # model = SpecialSE(512, 256, reduction=4)
 94 | 
 95 |     # x1 = torch.zeros(4, 512, 13, 13)
 96 |     # y1 = torch.zeros(4, 256, 26, 26)
 97 | 
 98 |     # # attention, feature
 99 |     # print(model(x1, y1).shape)
100 | 


--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | def parse_model_cfg(path):
 7 |     # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3'
 8 |     if not path.endswith('.cfg'):  # add .cfg suffix if omitted
 9 |         path += '.cfg'
10 |     # add cfg/ prefix if omitted
11 |     if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path):
12 |         path = 'cfg' + os.sep + path
13 | 
14 |     with open(path, 'r') as f:
15 |         lines = f.read().split('\n')
16 |     lines = [x for x in lines if x and not x.startswith('#')]
17 |     lines = [x.rstrip().lstrip()
18 |              for x in lines]  # get rid of fringe whitespaces
19 |     mdefs = []  # module definitions
20 |     for line in lines:
21 |         if line.startswith('['):  # This marks the start of a new block
22 |             mdefs.append({})
23 |             mdefs[-1]['type'] = line[1:-1].rstrip()
24 |             if mdefs[-1]['type'] == 'convolutional':
25 |                 # pre-populate with zeros (may be overwritten later)
26 |                 mdefs[-1]['batch_normalize'] = 0
27 |         else:
28 |             key, val = line.split("=")
29 |             key = key.rstrip()
30 | 
31 |             if 'anchors' in key:
32 |                 # np anchors
33 |                 mdefs[-1][key] = np.array([float(x)
34 |                                            for x in val.split(',')]).reshape((-1, 2))
35 |             else:
36 |                 mdefs[-1][key] = val.strip()
37 | 
38 |     # Check all fields are supported
39 |     supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',
40 |                  'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',
41 |                  'stride_x', 'stride_y', 'reduction', 'out_plane', 'shape']
42 | 
43 |     f = []  # fields
44 |     for x in mdefs[1:]:
45 |         [f.append(k) for k in x if k not in f]
46 |     u = [x for x in f if x not in supported]  # unsupported fields
47 |     assert not any(
48 |         u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path)
49 | 
50 |     return mdefs
51 | 
52 | 
53 | def parse_data_cfg(path):
54 |     # Parses the data configuration file
55 |     # add data/ prefix if omitted
56 |     if not os.path.exists(path) and os.path.exists('data' + os.sep + path):
57 |         path = 'data' + os.sep + path
58 | 
59 |     with open(path, 'r') as f:
60 |         lines = f.readlines()
61 | 
62 |     options = dict()
63 |     for line in lines:
64 |         line = line.strip()
65 |         if line == '' or line.startswith('#'):
66 |             continue
67 |         key, val = line.split('=')
68 |         options[key.strip()] = val.strip()
69 | 
70 |     return options
71 | 


--------------------------------------------------------------------------------
/utils/process_darklabel.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | '''
 3 | gt.txt:
 4 | ---------
 5 | frame(从1开始计), id, box(left top w, h),ignore=1(不忽略), class=1(从1开始),覆盖=1), 
 6 | 1,1,1363,569,103,241,1,1,0.86014
 7 | 2,1,1362,568,103,241,1,1,0.86173
 8 | 3,1,1362,568,103,241,1,1,0.86173
 9 | 4,1,1362,568,103,241,1,1,0.86173
10 | 
11 | cutout24_gt.txt
12 | ---
13 | frame(从0开始计), 数量, id(从0开始), box(x1,y1,x2,y2), class=null
14 | 0,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
15 | 1,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
16 | 2,4,0,450,194,558,276,null,1,408,147,469,206,null,2,374,199,435,307,null,3,153,213,218,314,null
17 | '''
18 | 
19 | 
20 | def xyxy2xywh(x):
21 |     # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
22 |     # y = torch.zeros_like(x) if isinstance(x,
23 |     #                                       torch.Tensor) else np.zeros_like(x)
24 |     y = [0, 0, 0, 0]
25 | 
26 |     y[0] = (x[0] + x[2]) / 2
27 |     y[1] = (x[1] + x[3]) / 2
28 |     y[2] = x[2] - x[0]
29 |     y[3] = x[3] - x[1]
30 |     return y
31 | 
32 | def process_darklabel(video_label_path, mot_label_path):
33 |     f = open(video_label_path, "r")
34 |     f_o = open(mot_label_path, "w")
35 | 
36 |     contents = f.readlines()
37 | 
38 |     for line in contents:
39 |         line = line[:-1]
40 |         num_list = [num for num in line.split(',')]
41 | 
42 |         frame_id = int(num_list[0]) + 1
43 |         total_num = int(num_list[1])
44 | 
45 |         base = 2
46 | 
47 |         for i in range(total_num):
48 | 
49 |             print(base, base + i * 6, base + i * 6 + 4)
50 | 
51 |             _id = int(num_list[base + i * 6]) + 1
52 |             _box_x1 = int(num_list[base + i * 6 + 1])
53 |             _box_y1 = int(num_list[base + i * 6 + 2])
54 |             _box_x2 = int(num_list[base + i * 6 + 3])
55 |             _box_y2 = int(num_list[base + i * 6 + 4])
56 | 
57 |             y = xyxy2xywh([_box_x1, _box_y1, _box_x2, _box_y2])
58 | 
59 |             write_line = "%d,%d,%d,%d,%d,%d,1,1,1\n" % (frame_id, _id, y[0],
60 |                                                         y[1], y[2], y[3])
61 | 
62 |             f_o.write(write_line)
63 | 
64 |     f.close()
65 |     f_o.close()
66 | 
67 | if __name__ == "__main__":
68 |     root_dir = "./data/videosample"
69 | 
70 |     for item in os.listdir(root_dir):
71 |         full_path = os.path.join(root_dir, item)
72 | 
73 |         video_path = os.path.join(full_path, item+".mp4")
74 |         video_label_path = os.path.join(full_path, item + "_gt.txt")
75 |         mot_label_path = os.path.join(full_path, "gt.txt")
76 |         process_darklabel(video_label_path, mot_label_path)
77 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | 
  5 | 
  6 | def init_seeds(seed=0):
  7 |     torch.manual_seed(seed)
  8 |     torch.cuda.manual_seed(seed)
  9 |     torch.cuda.manual_seed_all(seed)
 10 | 
 11 |     # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
 12 |     if seed == 0:
 13 |         torch.backends.cudnn.deterministic = True
 14 |         torch.backends.cudnn.benchmark = False
 15 | 
 16 | 
 17 | def select_device(device='', apex=False, batch_size=None):
 18 |     # device = 'cpu' or '0' or '0,1,2,3'
 19 |     cpu_request = device.lower() == 'cpu'
 20 |     if device and not cpu_request:  # if device requested other than 'cpu'
 21 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 22 |         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
 23 | 
 24 |     cuda = False if cpu_request else torch.cuda.is_available()
 25 |     if cuda:
 26 |         c = 1024 ** 2  # bytes to MB
 27 |         ng = torch.cuda.device_count()
 28 |         if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
 29 |             assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
 30 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
 31 |         s = 'Using CUDA ' + ('Apex ' if apex else '')  # apex for mixed precision https://github.com/NVIDIA/apex
 32 |         for i in range(0, ng):
 33 |             if i == 1:
 34 |                 s = ' ' * len(s)
 35 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
 36 |                   (s, i, x[i].name, x[i].total_memory / c))
 37 |     else:
 38 |         print('Using CPU')
 39 | 
 40 |     print('')  # skip a line
 41 |     return torch.device('cuda:0' if cuda else 'cpu')
 42 | 
 43 | 
 44 | def fuse_conv_and_bn(conv, bn):
 45 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
 46 |     with torch.no_grad():
 47 |         # init
 48 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
 49 |                                     conv.out_channels,
 50 |                                     kernel_size=conv.kernel_size,
 51 |                                     stride=conv.stride,
 52 |                                     padding=conv.padding,
 53 |                                     bias=True)
 54 | 
 55 |         # prepare filters
 56 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
 57 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
 58 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
 59 | 
 60 |         # prepare spatial bias
 61 |         if conv.bias is not None:
 62 |             b_conv = conv.bias
 63 |         else:
 64 |             b_conv = torch.zeros(conv.weight.size(0))
 65 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
 66 |         fusedconv.bias.copy_(b_conv + b_bn)
 67 | 
 68 |         return fusedconv
 69 | 
 70 | 
 71 | def model_info(model, report='summary'):
 72 |     # Plots a line-by-line description of a PyTorch model
 73 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
 74 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
 75 |     if report is 'full':
 76 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
 77 |         for i, (name, p) in enumerate(model.named_parameters()):
 78 |             name = name.replace('module_list.', '')
 79 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
 80 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
 81 |     print('Model Summary: %g layers, %g parameters, %g gradients' % (len(list(model.parameters())), n_p, n_g))
 82 | 
 83 | 
 84 | def load_classifier(name='resnet101', n=2):
 85 |     # Loads a pretrained model reshaped to n-class output
 86 |     import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
 87 |     model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
 88 | 
 89 |     # Display model properties
 90 |     for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']:
 91 |         print(x + ' =', eval(x))
 92 | 
 93 |     # Reshape output to n classes
 94 |     filters = model.last_linear.weight.shape[1]
 95 |     model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
 96 |     model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
 97 |     model.last_linear.out_features = n
 98 |     return model
 99 | 
100 | 
101 | from collections import defaultdict
102 | from torch.optim import Optimizer
103 | 
104 | 
105 | class Lookahead(Optimizer):
106 |     def __init__(self, optimizer, k=5, alpha=0.5):
107 |         self.optimizer = optimizer
108 |         self.k = k
109 |         self.alpha = alpha
110 |         self.param_groups = self.optimizer.param_groups
111 |         self.state = defaultdict(dict)
112 |         self.fast_state = self.optimizer.state
113 |         for group in self.param_groups:
114 |             group["counter"] = 0
115 | 
116 |     def update(self, group):
117 |         for fast in group["params"]:
118 |             param_state = self.state[fast]
119 |             if "slow_param" not in param_state:
120 |                 param_state["slow_param"] = torch.zeros_like(fast.data)
121 |                 param_state["slow_param"].copy_(fast.data)
122 |             slow = param_state["slow_param"]
123 |             slow += (fast.data - slow) * self.alpha
124 |             fast.data.copy_(slow)
125 | 
126 |     def update_lookahead(self):
127 |         for group in self.param_groups:
128 |             self.update(group)
129 | 
130 |     def step(self, closure=None):
131 |         loss = self.optimizer.step(closure)
132 |         for group in self.param_groups:
133 |             if group["counter"] == 0:
134 |                 self.update(group)
135 |             group["counter"] += 1
136 |             if group["counter"] >= self.k:
137 |                 group["counter"] = 0
138 |         return loss
139 | 
140 |     def state_dict(self):
141 |         fast_state_dict = self.optimizer.state_dict()
142 |         slow_state = {
143 |             (id(k) if isinstance(k, torch.Tensor) else k): v
144 |             for k, v in self.state.items()
145 |         }
146 |         fast_state = fast_state_dict["state"]
147 |         param_groups = fast_state_dict["param_groups"]
148 |         return {
149 |             "fast_state": fast_state,
150 |             "slow_state": slow_state,
151 |             "param_groups": param_groups,
152 |         }
153 | 
154 |     def load_state_dict(self, state_dict):
155 |         slow_state_dict = {
156 |             "state": state_dict["slow_state"],
157 |             "param_groups": state_dict["param_groups"],
158 |         }
159 |         fast_state_dict = {
160 |             "state": state_dict["fast_state"],
161 |             "param_groups": state_dict["param_groups"],
162 |         }
163 |         super(Lookahead, self).load_state_dict(slow_state_dict)
164 |         self.optimizer.load_state_dict(fast_state_dict)
165 |         self.fast_state = self.optimizer.state
166 | 
167 |     def add_param_group(self, param_group):
168 |         param_group["counter"] = 0
169 |         self.optimizer.add_param_group(param_group)
170 | 


--------------------------------------------------------------------------------
/utils/tsne_vis.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import argparse
  4 | import numpy as np
  5 | import matplotlib.pyplot as plt
  6 | from matplotlib import offsetbox
  7 | from sklearn import (manifold, datasets, decomposition, ensemble,
  8 |                      discriminant_analysis, random_projection, neighbors)
  9 | 
 10 | 
 11 | class tSNE_Visual():
 12 |     def __init__(self):
 13 |         super(tSNE_Visual, self).__init__()
 14 |         self.parser = argparse.ArgumentParser()
 15 |         self.parser.add_argument('--Input',
 16 |                                  type=str,
 17 |                                  default='data',
 18 |                                  help='the path of target dataset')
 19 |         self.parser.add_argument('--Size',
 20 |                                  type=int,
 21 |                                  default=400,
 22 |                                  help='the size of every class')
 23 |         self.parser.add_argument('--Zoom',
 24 |                                  type=float,
 25 |                                  default=0.1,
 26 |                                  help='the size of every class')
 27 |         self.parser.add_argument('--Output',
 28 |                                  type=str,
 29 |                                  default='t-SNE1.png',
 30 |                                  help='the out path of result image')
 31 | 
 32 |     def parse(self):
 33 |         self.opt = self.parser.parse_args()
 34 |         args = vars(self.opt)
 35 |         print('\n--- load options ---')
 36 |         for name, value in sorted(args.items()):
 37 |             print('%s: %s' % (str(name), str(value)))
 38 |         return self.opt
 39 | 
 40 |     def plot_embedding(self, X, _output, zoom, title=None):
 41 |         x_min, x_max = np.min(X, 0), np.max(X, 0)
 42 |         X = (X - x_min) / (x_max - x_min)
 43 | 
 44 |         plt.figure(figsize=(20, 20))
 45 |         ax = plt.subplot(111)
 46 | 
 47 |         if hasattr(offsetbox, 'AnnotationBbox'):
 48 |             # only print thumbnails with matplotlib > 1.0
 49 |             shown_images = np.array([[1., 1.]])  # just something big
 50 |             for i in range(X.shape[0]):
 51 |                 dist = np.sum((X[i] - shown_images)**2, 1)
 52 |                 #if np.min(dist) < 4e-3:
 53 |                 # don't show points that are too close
 54 |                 #   continue
 55 |                 shown_images = np.r_[shown_images, [X[i]]]
 56 |                 imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(
 57 |                     real_imgs[i], zoom=0.12, cmap=plt.cm.gray_r),
 58 |                                                     X[i],
 59 |                                                     pad=0)
 60 |                 ax.add_artist(imagebox)
 61 |         '''for i in range(X.shape[0]):
 62 |             #cls = plt.text(X[i, 0], X[i, 1], _classes[y[i][0].astype(int)-1],
 63 |             cls = plt.text(X[i, 0], X[i, 1], str(y[i].astype(int)),
 64 |             #cls = plt.text(X[i, 0], X[i, 1], '★',
 65 |                      color=_colors[int(y[i][0]-1)],
 66 |                      fontdict={'weight': 'bold', 'size': 12})
 67 |             cls.set_zorder(20) '''
 68 | 
 69 |         ax.spines['top'].set_visible(False)
 70 |         ax.spines['right'].set_visible(False)
 71 |         ax.spines['bottom'].set_visible(False)
 72 |         ax.spines['left'].set_visible(False)
 73 |         plt.xticks([]), plt.yticks([])
 74 |         if title is not None:
 75 |             plt.title(title)
 76 |         plt.savefig(_output)
 77 | 
 78 | 
 79 | if __name__ == '__main__':
 80 |     # Disable the GUI matplotlib
 81 |     plt.switch_backend('agg')
 82 | 
 83 |     tsne_visual = tSNE_Visual()
 84 |     opts = tsne_visual.parse()
 85 |     dataroot = opts.Input
 86 |     _size = opts.Size
 87 |     _output = opts.Output
 88 |     _zoom = opts.Zoom
 89 | 
 90 |     dirs = []
 91 |     for item in os.listdir(dataroot):
 92 |         if ('.ipynb_checkpoints' not in item):
 93 |             dirs.append(item)
 94 | 
 95 |     _len = len(dirs)
 96 |     y = np.zeros((_size * _len, 1))
 97 |     for i in range(_len):
 98 |         y[i * _size:(i + 1) * _size] = i + 1
 99 | 
100 |     imgs = []
101 |     real_imgs = []
102 |     for i in range(_len):
103 |         single_cls = []
104 |         path = os.path.join(dataroot, dirs[i])
105 |         dataset_list = os.listdir(path)
106 |         cnt = 0
107 |         for item in dataset_list:
108 |             if (cnt == _size):
109 |                 break
110 |             if ('.ipynb_checkpoints' in item):
111 |                 continue
112 |             data_path = os.path.join(path, item)
113 |             temp = cv2.imread(data_path)
114 |             real_img = cv2.cvtColor(temp, cv2.COLOR_BGR2RGB)
115 |             imgs.append(temp.reshape(-1))
116 |             real_imgs.append(real_img)
117 |             cnt = cnt + 1
118 |     np_imgs = np.array(imgs)
119 |     real_imgs = np.array(real_imgs)
120 | 
121 |     tsne = manifold.TSNE(n_components=2, init='random', random_state=0)
122 |     print(np_imgs.shape)
123 |     result = tsne.fit_transform(np_imgs)
124 | 
125 |     tsne_visual.plot_embedding(X=result, _output=_output, zoom=_zoom)


--------------------------------------------------------------------------------
/utils/utils_sort.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | COLORS_10 =[(144,238,144),(178, 34, 34),(221,160,221),(  0,255,  0),(  0,128,  0),(210,105, 30),(220, 20, 60),
 5 |             (192,192,192),(255,228,196),( 50,205, 50),(139,  0,139),(100,149,237),(138, 43,226),(238,130,238),
 6 |             (255,  0,255),(  0,100,  0),(127,255,  0),(255,  0,255),(  0,  0,205),(255,140,  0),(255,239,213),
 7 |             (199, 21,133),(124,252,  0),(147,112,219),(106, 90,205),(176,196,222),( 65,105,225),(173,255, 47),
 8 |             (255, 20,147),(219,112,147),(186, 85,211),(199, 21,133),(148,  0,211),(255, 99, 71),(144,238,144),
 9 |             (255,255,  0),(230,230,250),(  0,  0,255),(128,128,  0),(189,183,107),(255,255,224),(128,128,128),
10 |             (105,105,105),( 64,224,208),(205,133, 63),(  0,128,128),( 72,209,204),(139, 69, 19),(255,245,238),
11 |             (250,240,230),(152,251,152),(  0,255,255),(135,206,235),(  0,191,255),(176,224,230),(  0,250,154),
12 |             (245,255,250),(240,230,140),(245,222,179),(  0,139,139),(143,188,143),(255,  0,  0),(240,128,128),
13 |             (102,205,170),( 60,179,113),( 46,139, 87),(165, 42, 42),(178, 34, 34),(175,238,238),(255,248,220),
14 |             (218,165, 32),(255,250,240),(253,245,230),(244,164, 96),(210,105, 30)]
15 | 
16 | 
17 | # def draw_bbox(img, box, cls_name, identity=None, offset=(0,0)):
18 | #     '''
19 | #         draw box of an id
20 | #     '''
21 | #     x1,y1,x2,y2 = [int(i+offset[idx%2]) for idx,i in enumerate(box)]
22 | #     # set color and label text
23 | #     color = COLORS_10[identity%len(COLORS_10)] if identity is not None else COLORS_10[0]
24 | #     label = '{} {}'.format(cls_name, identity)
25 | #     # box text and bar
26 | #     t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
27 | #     cv2.rectangle(img,(x1, y1),(x2,y2),color,2)
28 | #     cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29 | #     cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 1, [255,255,255], 1)
30 | #     return img
31 | 
32 | 
33 | def plot_one_box(x, ori_img, color=None, label=None, line_thickness=None):
34 |     # Plots one bounding box on image img
35 |     img = ori_img
36 |     tl = line_thickness or round(
37 |         0.002 * max(img.shape[0:2])) + 1  # line thickness
38 |     color = color or [random.randint(0, 255) for _ in range(3)]
39 |     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
40 |     cv2.rectangle(img, c1, c2, color, thickness=tl)
41 |     if label:
42 |         tf = max(tl - 1, 1)  # font thickness
43 |         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
44 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
45 |         cv2.rectangle(img, c1, c2, color, -1)  # filled
46 |         cv2.putText(img,
47 |                     label, (c1[0], c1[1] - 2),
48 |                     0,
49 |                     tl / 3, [225, 255, 255],
50 |                     thickness=tf,
51 |                     lineType=cv2.LINE_AA)
52 |     return img
53 | 
54 | 
55 | '''
56 | deep sort 中的画图方法，在原图上进行作画
57 | '''
58 | def draw_bboxes(ori_img, bbox, identities=None, offset=(0,0)):
59 |     img = ori_img
60 |     for i,box in enumerate(bbox):
61 |         x1,y1,x2,y2 = [int(i) for i in box]
62 |         x1 += offset[0]
63 |         x2 += offset[0]
64 |         y1 += offset[1]
65 |         y2 += offset[1]
66 |         # box text and bar
67 |         id = int(identities[i]) if identities is not None else 0
68 |         color = COLORS_10[id%len(COLORS_10)]
69 |         label = '{}{:d}'.format("", id)
70 |         # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
71 |         img = plot_one_box([x1,y1,x2,y2], img, color, label)
72 |         # cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
73 |         # cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
74 |         # cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
75 |     return img
76 | 
77 | 
78 | 
79 | 
80 | 
81 | def softmax(x):
82 |     assert isinstance(x, np.ndarray), "expect x be a numpy array"
83 |     x_exp = np.exp(x*5)
84 |     return x_exp/x_exp.sum()
85 | 
86 | def softmin(x):
87 |     assert isinstance(x, np.ndarray), "expect x be a numpy array"
88 |     x_exp = np.exp(-x)
89 |     return x_exp/x_exp.sum()
90 | 
91 | 
92 | 
93 | if __name__ == '__main__':
94 |     x = np.arange(10)/10.
95 |     x = np.array([0.5,0.5,0.5,0.6,1.])
96 |     y = softmax(x)
97 |     z = softmin(x)
98 |     import ipdb; ipdb.set_trace()


--------------------------------------------------------------------------------
/utils/visdom.py:
--------------------------------------------------------------------------------
 1 | import visdom
 2 | import time
 3 | import numpy as np
 4 | 
 5 | 
 6 | class Visualizer(object):
 7 |     def __init__(self, env='default', **kwargs):
 8 |         self.vis = visdom.Visdom(env=env, **kwargs)
 9 |         self.index = {}
10 | 
11 |     def plot_many_stack(self, d):
12 |         '''
13 |         self.plot('loss',1.00)
14 |         '''
15 |         name = list(d.keys())
16 |         name_total = " ".join(name)
17 |         x = self.index.get(name_total, 0)
18 |         val = list(d.values())
19 |         if len(val) == 1:
20 |             y = np.array(val)
21 |         else:
22 |             y = np.array(val).reshape(-1, len(val))
23 |         # print(x)
24 |         self.vis.line(
25 |             Y=y,
26 |             X=np.ones(y.shape) * x,
27 |             win=str(name_total),  # unicode
28 |             opts=dict(legend=name, title=name_total),
29 |             update=None if x == 0 else 'append')
30 |         self.index[name_total] = x + 1


--------------------------------------------------------------------------------
/weights/download_yolov3_weights.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # make '/weights' directory if it does not exist and cd into it
 4 | # mkdir -p weights && cd weights
 5 | 
 6 | # copy darknet weight files, continue '-c' if partially downloaded
 7 | # wget -c https://pjreddie.com/media/files/yolov3.weights
 8 | # wget -c https://pjreddie.com/media/files/yolov3-tiny.weights
 9 | # wget -c https://pjreddie.com/media/files/yolov3-spp.weights
10 | 
11 | # yolov3 pytorch weights
12 | # download from Google Drive: https://drive.google.com/drive/folders/1uxgUBemJVw9wZsdpboYbzUN4bcRhsuAI
13 | 
14 | # darknet53 weights (first 75 layers only)
15 | # wget -c https://pjreddie.com/media/files/darknet53.conv.74
16 | 
17 | # yolov3-tiny weights from darknet (first 16 layers only)
18 | # ./darknet partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
19 | # mv yolov3-tiny.conv.15 ../
20 | 
21 | # new method
22 | python3 -c "from models import *;
23 | attempt_download('weights/yolov3.pt');
24 | attempt_download('weights/yolov3-spp.pt')"
25 | 


--------------------------------------------------------------------------------
/weights/gcp.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # New VM
 4 | rm -rf yolov3 weights coco
 5 | git clone https://github.com/ultralytics/yolov3
 6 | bash yolov3/weights/download_yolov3_weights.sh && cp -r weights yolov3
 7 | bash yolov3/data/get_coco_dataset.sh
 8 | git clone https://github.com/cocodataset/cocoapi && cd cocoapi/PythonAPI && make && cd ../.. && cp -r cocoapi/PythonAPI/pycocotools yolov3
 9 | sudo reboot now
10 | 
11 | # Re-clone
12 | rm -rf yolov3
13 | git clone https://github.com/ultralytics/yolov3  # master
14 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test  # branch
15 | cp -r weights yolov3
16 | cp -r cocoapi/PythonAPI/pycocotools yolov3
17 | cd yolov3
18 | 
19 | # Train
20 | python3 train.py
21 | 
22 | # Resume
23 | python3 train.py --resume
24 | 
25 | # Detect
26 | python3 detect.py
27 | 
28 | # Test
29 | python3 test.py --save-json
30 | 
31 | # Git pull
32 | git pull https://github.com/ultralytics/yolov3  # master
33 | git pull https://github.com/ultralytics/yolov3 test  # branch
34 | 
35 | # Test Darknet training
36 | python3 test.py --weights ../darknet/backup/yolov3.backup
37 | 
38 | # Copy latest.pt TO bucket
39 | gsutil cp yolov3/weights/latest1gpu.pt gs://ultralytics
40 | 
41 | # Copy latest.pt FROM bucket
42 | gsutil cp gs://ultralytics/latest.pt yolov3/weights/latest.pt
43 | wget https://storage.googleapis.com/ultralytics/yolov3/latest_v1_0.pt -O weights/latest_v1_0.pt
44 | wget https://storage.googleapis.com/ultralytics/yolov3/best_v1_0.pt -O weights/best_v1_0.pt
45 | 
46 | # Reproduce tutorials
47 | rm results*.txt  # WARNING: removes existing results
48 | python3 train.py --nosave --data data/coco_1img.data && mv results.txt results3_1img.txt
49 | python3 train.py --nosave --data data/coco_10img.data && mv results.txt results3_10img.txt
50 | python3 train.py --nosave --data data/coco_100img.data && mv results.txt results4_100img.txt
51 | python3 train.py --nosave --data data/coco_100img.data --transfer && mv results.txt results3_100imgTL.txt
52 | python3 -c "from utils import utils; utils.plot_results()"
53 | gsutil cp results*.txt gs://ultralytics
54 | gsutil cp results.png gs://ultralytics
55 | sudo shutdown
56 | 
57 | # Unit tests
58 | rm -rf yolov3
59 | git clone https://github.com/ultralytics/yolov3  # master
60 | cp -r weights yolov3  && cd yolov3
61 | python3 detect.py  # detect
62 | python3 test.py --data data/coco_32img.data  # test
63 | python3 train.py --data data/coco_32img.data --epochs 5 --nosave  # train
64 | 
65 | # Debug/Development
66 | rm -rf yolov3
67 | git clone https://github.com/ultralytics/yolov3  # master
68 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 yolov3_test  # branch
69 | cp -r cocoapi/PythonAPI/pycocotools yolov3
70 | cp -r weights yolov3 && cd yolov3
71 | python3 train.py --evolve --data data/coco_100img.data --num-workers 2  --epochs 30
72 | gsutil cp evolve.txt gs://ultralytics
73 | sudo shutdown
74 | 


--------------------------------------------------------------------------------