├── DCNv2-pytorch_1.9.zip
├── LICENSE
├── README.md
├── README_en.md
├── cython_bbox-0.1.3.tar.gz
├── docs
├── FairMOT.png
├── Interface.png
├── MOT20-01.gif
├── Making_Introduction_cn.md
└── Making_Introduction_en.md
├── interface.py
├── lib
├── DCNv2
│ ├── .gitignore
│ ├── LICENSE
│ ├── README.md
│ ├── __init__.py
│ ├── _ext.cp38-win_amd64.pyd
│ ├── dcn_v2.py
│ ├── make.sh
│ ├── setup.py
│ ├── src
│ │ ├── cpu
│ │ │ ├── dcn_v2_cpu.cpp
│ │ │ ├── dcn_v2_im2col_cpu.cpp
│ │ │ ├── dcn_v2_im2col_cpu.h
│ │ │ ├── dcn_v2_psroi_pooling_cpu.cpp
│ │ │ └── vision.h
│ │ ├── cuda
│ │ │ ├── dcn_v2_cuda.cu
│ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ └── vision.h
│ │ ├── dcn_v2.h
│ │ └── vision.cpp
│ └── test
│ │ ├── test.py
│ │ ├── testcpu.py
│ │ └── testcuda.py
├── cfg
│ ├── crowdhuman.json
│ ├── data.json
│ ├── data_all.json
│ ├── data_half.json
│ ├── mot15.json
│ ├── mot16.json
│ ├── mot17.json
│ ├── mot17_half.json
│ └── mot20.json
├── datasets
│ ├── dataset
│ │ ├── __pycache__
│ │ │ ├── jde.cpython-37.pyc
│ │ │ └── jde.cpython-38.pyc
│ │ ├── jde.py
│ │ └── jde_yolov5.py
│ └── dataset_factory.py
├── logger.py
├── models
│ ├── __pycache__
│ │ ├── common.cpython-38.pyc
│ │ ├── decode.cpython-38.pyc
│ │ ├── model.cpython-38.pyc
│ │ ├── utils.cpython-38.pyc
│ │ └── yolo.cpython-38.pyc
│ ├── common.py
│ ├── data_parallel.py
│ ├── decode.py
│ ├── losses.py
│ ├── model.py
│ ├── networks
│ │ ├── __pycache__
│ │ │ ├── dlav0.cpython-38.pyc
│ │ │ ├── pose_dla_conv.cpython-38.pyc
│ │ │ ├── pose_dla_dcn.cpython-38.pyc
│ │ │ ├── pose_hrnet.cpython-38.pyc
│ │ │ ├── resnet_dcn.cpython-38.pyc
│ │ │ └── resnet_fpn_dcn.cpython-38.pyc
│ │ ├── config
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-38.pyc
│ │ │ │ └── default.cpython-38.pyc
│ │ │ ├── default.py
│ │ │ ├── hrnet_w18.yaml
│ │ │ ├── hrnet_w32.yaml
│ │ │ └── yolov5s.yaml
│ │ ├── dlav0.py
│ │ ├── pose_dla_conv.py
│ │ ├── pose_dla_dcn.py
│ │ ├── pose_hrnet.py
│ │ ├── resnet_dcn.py
│ │ └── resnet_fpn_dcn.py
│ ├── scatter_gather.py
│ ├── utils.py
│ └── yolo.py
├── opts.py
├── tracker
│ ├── __pycache__
│ │ ├── basetrack.cpython-38.pyc
│ │ ├── matching.cpython-38.pyc
│ │ └── multitracker.cpython-38.pyc
│ ├── basetrack.py
│ ├── matching.py
│ └── multitracker.py
├── tracking_utils
│ ├── __pycache__
│ │ ├── evaluation.cpython-38.pyc
│ │ ├── io.cpython-38.pyc
│ │ ├── kalman_filter.cpython-38.pyc
│ │ ├── log.cpython-38.pyc
│ │ ├── timer.cpython-38.pyc
│ │ ├── utils.cpython-38.pyc
│ │ └── visualization.cpython-38.pyc
│ ├── evaluation.py
│ ├── io.py
│ ├── kalman_filter.py
│ ├── log.py
│ ├── nms.py
│ ├── parse_config.py
│ ├── timer.py
│ ├── utils.py
│ └── visualization.py
├── trains
│ ├── base_trainer.py
│ ├── mot.py
│ └── train_factory.py
└── utils
│ ├── __pycache__
│ ├── image.cpython-38.pyc
│ ├── post_process.cpython-38.pyc
│ └── utils.cpython-38.pyc
│ ├── image.py
│ ├── post_process.py
│ └── utils.py
├── main.py
├── requirements.txt
└── src
├── VideoTimer.py
├── built_in_camera_track.py
├── external_camera_track.py
├── tracker
├── __pycache__
│ └── _init_paths.cpython-38.pyc
├── _init_paths.py
├── demo.py
├── detect.py
├── to_track.py
├── to_track_camera.py
├── track.py
└── track_half.py
└── video_track.py
/DCNv2-pytorch_1.9.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/DCNv2-pytorch_1.9.zip
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 ReverseSacle(CLX)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 | Please follow the original author's License
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CrowdTracker-Pytorch(单摄像头)
2 |
3 | [简体中文](./README.md) | [English](./README_en.md)
4 |
5 | ## 地址导航
6 |
7 | + [→Paddle版地址](https://github.com/ReverseSacle/FairMOT-Paddle-Tracker_Basic)
8 | + [→FairMot作者(Github)](https://github.com/ifzhang/FairMOT)
9 |
10 | ## 效果预览
11 |
12 | 
13 |
14 | ## 界面预览
15 |
16 | 
17 |
18 | ## 相关介绍
19 |
20 | + [→制作介绍](./docs/Making_Introduction_cn.md)
21 |
22 | ## 环境要求
23 |
24 | + python3
25 | + opencv-python
26 | + DCNv2
27 | + 已运行的测试平台 → window10
28 | + 已经配置好的conda环境(所需要的全部环境的整合) [→OneDrive](https://1drv.ms/u/s!AlYD8lJlPHCIiSrFcXk8xcSq_zLD?e=e51wjQ?download=1)
29 |
30 | ## 调试运行
31 |
32 | + ` git clone "https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic.git"`
33 | + 解压`CrowdTracker-env`环境到`./Anaconda3/envs/`目录下
34 | + 使用编译器,例如Pycharm,调用此`CrowdTracker-env`环境,再在此根目录中创建一个`models`文件夹,将下面的模型权重压缩包解压到此文件夹中
35 |
36 | ## 提供的模型权重文件
37 |
38 | + **下载:** 由原作者提供 [→OneDrive](https://1drv.ms/u/s!AlYD8lJlPHCIh22rxkVDfBph2VCM?e=0Tudce?download=1) 默认需放置根目录的models文件夹下
39 | + **额外缺少的文件:** [→OneDrive](https://1drv.ms/u/s!AlYD8lJlPHCIh2xS1T_M_RBKkTIf?e=iae70F?download=1) 放置在`C:\Users\User name\.cache\torch\hub\checkpoints`
40 |
41 | ## 基础套件
42 |
43 | + `PyQt5` → 界面窗口、按钮组、阈值选择、GPU选择、文件选择与进度条
44 | + `Pytorch` → 深度学习追踪系统
45 | + `OpenCV` → 视频和摄像头追踪,播放与暂停
46 |
47 | ## 更新日志
48 |
49 | 2021.11.29 添加新分支ByteTrack-Kernel,以ByteTrack核心替换了当前的追踪核心
50 |
51 | 2022.12.12 分别将ByteTrack追踪核心与FairMot追踪核心的代码进行了精简化,各将代码拆分成了界面、视频追踪、内置摄像头追踪与外置摄像头追踪。整合了LINK2001错误修复环境。
52 |
--------------------------------------------------------------------------------
/README_en.md:
--------------------------------------------------------------------------------
1 | # CrowdTracker-Pytorch(Single Camera)
2 |
3 | [简体中文](https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic/blob/main/README.md) | [English](https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic/blob/main/README_en.md)
4 |
5 | ## Address Navigation
6 |
7 | + [→Paddle edtion address](https://github.com/ReverseSacle/FairMOT-Paddle-Tracker_Basic)
8 | + [→FairMot author(Github)](https://github.com/ifzhang/FairMOT)
9 |
10 | ## Preview
11 |
12 | 
13 |
14 | ## Preview for Interface
15 |
16 | 
17 |
18 | ## Enviroment Requirement
19 |
20 | + Python3
21 | + opencv-python
22 | + DCNv2
23 | + Had tested system → window10
24 | + The provided pkged enviroment(coda enviroment that has all the needed libs) [→Google Drive](https://drive.google.com/file/d/1cOELR0lXD8oJwzMne0kx_PShylMwclBA/view?usp=sharing)
25 |
26 | ## Introduction
27 |
28 | + [→Making_Introduction](https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic/blob/main/docs/Making_Introduction_en.md)
29 |
30 | ## Provided Model file
31 |
32 | + **Download:** Provided by Original author [→Google Drive](https://drive.google.com/file/d/1PRkK0G5-I9t63cT_YgCetKSpxQEecZ7-/view?usp=sharing) Need to put it into the folder named `models` in root dir
33 | + **Extra missing file:** [→Google Drive](https://drive.google.com/file/d/1sZ0PHOtHkfAHpJ1Na4Ff0SD7NJktFKHq/view?usp=sharing) Put it into `C:\Users\User name\.cache\torch\hub\checkpoints`
34 |
35 | ## Quickly start
36 |
37 | + `git clone "https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic.git"`
38 | + Unzip the 7z file of `CrowdTracker-env` in `./Anaconda3/envs/`
39 | + Use compiler, like pycharm, where choose the enviroment of CrowdTracker-env on.Then, create a folder named `models` in the root of this project, unzip the beneath mode file into the `models` folder.
40 |
41 | ## About Construction
42 |
43 | + `PyQt5` → the interface,the group of button, the choosing bar of threshold value, the choosing bar of GPU, the function of choosing file and the progress bar
44 | + `Pytorch` → the track system of deep-learning
45 | + `OpenCV` → the function of tracking (video/camera), the function of playing video and stoping video
46 |
47 | ## Update Record
48 |
49 | 2021.11.29 Create a new brach ByteTrack-kernel,replace original mot kernel with bytetrack
50 |
51 | 2022.12.12 Simplify the source code of ByteTrack-track-kernel and FairMot-track-kernel, which is divided into the source code of interface, video tracking,built-in camera tracking and external camera tracking.Fix the error of LINK2001 in the provided compiler enviroment.
52 |
53 |
54 |
--------------------------------------------------------------------------------
/cython_bbox-0.1.3.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/cython_bbox-0.1.3.tar.gz
--------------------------------------------------------------------------------
/docs/FairMOT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/docs/FairMOT.png
--------------------------------------------------------------------------------
/docs/Interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/docs/Interface.png
--------------------------------------------------------------------------------
/docs/MOT20-01.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/docs/MOT20-01.gif
--------------------------------------------------------------------------------
/docs/Making_Introduction_cn.md:
--------------------------------------------------------------------------------
1 | 制作介绍
2 | ===
3 |
4 | 思路
5 | ---
6 |
7 | + **Detection and Re_ID**
8 |
9 | 
10 |
11 | 该网络主要为目标检测和重识别一体化。
12 | 对于one-stage跟踪都是基于anchor锚,这造成了提取的特征未与对象中心对齐,例如当两个目标相互靠近时,ahchor的位置就不太准确了。
13 |
14 | 图中,用点代表目标来提高位置的准确性。此外,与以往的通过高维特征来Re_ID相比,低维特征对MOT更好,因为它的训练图像比ReID少。学习低维特征有助于减少过拟合小数据的风险,并提高跟踪的稳定性。
15 |
16 | + [Paper_地址](https://arxiv.org/abs/2004.01888)
17 |
--------------------------------------------------------------------------------
/docs/Making_Introduction_en.md:
--------------------------------------------------------------------------------
1 | Making Introduction
2 | ===
3 |
4 | Idea
5 | ---
6 |
7 | + **Detection and Embedding**
8 |
9 | 
10 |
11 | + One-shot MOT
12 | + Anchor-free detection
13 | + Learning Low-dimensional features
14 |
15 | + [Paper_adress](https://arxiv.org/abs/2004.01888)
16 |
17 |
--------------------------------------------------------------------------------
/lib/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext
7 | build
8 | DCNv2.egg-info
9 | dist
--------------------------------------------------------------------------------
/lib/DCNv2/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, Charles Shang
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/lib/DCNv2/README.md:
--------------------------------------------------------------------------------
1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0
2 |
3 | ### Build
4 | ```bash
5 | ./make.sh # build
6 | python test.py # run examples and gradient check
7 | ```
8 |
9 | ### An Example
10 | - deformable conv
11 | ```python
12 | from dcn_v2 import DCN
13 | input = torch.randn(2, 64, 128, 128).cuda()
14 | # wrap all things (offset and mask) in DCN
15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 | output = dcn(input)
17 | print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 | from dcn_v2 import DCNPooling
22 | input = torch.randn(2, 32, 64, 64).cuda()
23 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 | x = torch.randint(256, (20, 1)).cuda().float()
25 | y = torch.randint(256, (20, 1)).cuda().float()
26 | w = torch.randint(64, (20, 1)).cuda().float()
27 | h = torch.randint(64, (20, 1)).cuda().float()
28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 |
30 | # mdformable pooling (V2)
31 | # wrap all things (offset and mask) in DCNPooling
32 | dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 | pooled_size=7,
34 | output_dim=32,
35 | no_trans=False,
36 | group_size=1,
37 | trans_std=0.1).cuda()
38 |
39 | dout = dpooling(input, rois)
40 | ```
41 | ### Note
42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with,
43 | ```bash
44 | git checkout pytorch_0.4
45 | ```
46 |
47 | ### Known Issues:
48 |
49 | - [x] Gradient check w.r.t offset (solved)
50 | - [ ] Backward is not reentrant (minor)
51 |
52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
53 |
54 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
56 | non-differential points?
57 |
58 | Update: all gradient check passes with double precision.
59 |
60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
61 | float `<1e-15` for double),
62 | so it may not be a serious problem (?)
63 |
64 | Please post an issue or PR if you have any comments.
65 |
--------------------------------------------------------------------------------
/lib/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/DCNv2/__init__.py
--------------------------------------------------------------------------------
/lib/DCNv2/_ext.cp38-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/DCNv2/_ext.cp38-win_amd64.pyd
--------------------------------------------------------------------------------
/lib/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python setup.py build develop
3 |
--------------------------------------------------------------------------------
/lib/DCNv2/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import glob
4 | import os
5 | import sys
6 |
7 | import torch
8 | from setuptools import find_packages, setup
9 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
10 |
11 | requirements = ["torch", "torchvision"]
12 |
13 |
14 | def get_extensions():
15 | this_dir = os.path.dirname(os.path.abspath(__file__))
16 | extensions_dir = os.path.join(this_dir, "src")
17 |
18 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
19 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
20 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
21 |
22 | os.environ["CC"] = "g++"
23 | sources = main_file + source_cpu
24 | extension = CppExtension
25 | extra_compile_args = {"cxx": []}
26 | define_macros = []
27 |
28 | if torch.cuda.is_available() and CUDA_HOME is not None:
29 | extension = CUDAExtension
30 | sources += source_cuda
31 | define_macros += [("WITH_CUDA", None)]
32 | extra_compile_args["nvcc"] = [
33 | "-DCUDA_HAS_FP16=1",
34 | "-D__CUDA_NO_HALF_OPERATORS__",
35 | "-D__CUDA_NO_HALF_CONVERSIONS__",
36 | "-D__CUDA_NO_HALF2_OPERATORS__",
37 | ]
38 | else:
39 | # raise NotImplementedError('Cuda is not available')
40 | pass
41 |
42 | extra_compile_args['cxx'].append('-fopenmp')
43 |
44 | sources = [os.path.join(extensions_dir, s) for s in sources]
45 | include_dirs = [extensions_dir]
46 | ext_modules = [
47 | extension(
48 | "_ext",
49 | sources,
50 | include_dirs=include_dirs,
51 | define_macros=define_macros,
52 | extra_compile_args=extra_compile_args,
53 | )
54 | ]
55 | return ext_modules
56 |
57 |
58 | setup(
59 | name="DCNv2",
60 | version="0.1",
61 | author="charlesshang",
62 | url="https://github.com/charlesshang/DCNv2",
63 | description="deformable convolutional networks",
64 | packages=find_packages(
65 | exclude=(
66 | "configs",
67 | "tests",
68 | )
69 | ),
70 | # install_requires=requirements,
71 | ext_modules=get_extensions(),
72 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
73 | )
74 |
--------------------------------------------------------------------------------
/lib/DCNv2/src/cpu/dcn_v2_im2col_cpu.h:
--------------------------------------------------------------------------------
1 |
2 | /*!
3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
4 | *
5 | * COPYRIGHT
6 | *
7 | * All contributions by the University of California:
8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
9 | * All rights reserved.
10 | *
11 | * All other contributions:
12 | * Copyright (c) 2014-2017, the respective contributors
13 | * All rights reserved.
14 | *
15 | * Caffe uses a shared copyright model: each contributor holds copyright over
16 | * their contributions to Caffe. The project versioning records all such
17 | * contribution and copyright details. If a contributor wants to further mark
18 | * their specific copyright on a particular contribution, they should indicate
19 | * their copyright solely in the commit message of the change when it is
20 | * committed.
21 | *
22 | * LICENSE
23 | *
24 | * Redistribution and use in source and binary forms, with or without
25 | * modification, are permitted provided that the following conditions are met:
26 | *
27 | * 1. Redistributions of source code must retain the above copyright notice, this
28 | * list of conditions and the following disclaimer.
29 | * 2. Redistributions in binary form must reproduce the above copyright notice,
30 | * this list of conditions and the following disclaimer in the documentation
31 | * and/or other materials provided with the distribution.
32 | *
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 | *
44 | * CONTRIBUTION AGREEMENT
45 | *
46 | * By contributing to the BVLC/caffe repository through pull-request, comment,
47 | * or otherwise, the contributor releases their content to the
48 | * license and copyright terms herein.
49 | *
50 | ***************** END Caffe Copyright Notice and Disclaimer ********************
51 | *
52 | * Copyright (c) 2018 Microsoft
53 | * Licensed under The MIT License [see LICENSE for details]
54 | * \file modulated_deformable_im2col.h
55 | * \brief Function definitions of converting an image to
56 | * column matrix based on kernel, padding, dilation, and offset.
57 | * These functions are mainly used in deformable convolution operators.
58 | * \ref: https://arxiv.org/abs/1811.11168
59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 | */
61 |
62 | /***************** Adapted by Charles Shang *********************/
63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro
64 |
65 | #ifndef DCN_V2_IM2COL_CPU
66 | #define DCN_V2_IM2COL_CPU
67 |
68 | #ifdef __cplusplus
69 | extern "C"
70 | {
71 | #endif
72 |
73 | void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask,
74 | const int batch_size, const int channels, const int height_im, const int width_im,
75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 | const int dilation_h, const int dilation_w,
78 | const int deformable_group, float *data_col);
79 |
80 | void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, float *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
88 | const int batch_size, const int channels, const int height_im, const int width_im,
89 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
90 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
91 | const int dilation_h, const int dilation_w,
92 | const int deformable_group,
93 | float *grad_offset, float *grad_mask);
94 |
95 | #ifdef __cplusplus
96 | }
97 | #endif
98 |
99 | #endif
--------------------------------------------------------------------------------
/lib/DCNv2/src/cpu/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | at::Tensor
5 | dcn_v2_cpu_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cpu_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/lib/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
1 |
2 | /*!
3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
4 | *
5 | * COPYRIGHT
6 | *
7 | * All contributions by the University of California:
8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
9 | * All rights reserved.
10 | *
11 | * All other contributions:
12 | * Copyright (c) 2014-2017, the respective contributors
13 | * All rights reserved.
14 | *
15 | * Caffe uses a shared copyright model: each contributor holds copyright over
16 | * their contributions to Caffe. The project versioning records all such
17 | * contribution and copyright details. If a contributor wants to further mark
18 | * their specific copyright on a particular contribution, they should indicate
19 | * their copyright solely in the commit message of the change when it is
20 | * committed.
21 | *
22 | * LICENSE
23 | *
24 | * Redistribution and use in source and binary forms, with or without
25 | * modification, are permitted provided that the following conditions are met:
26 | *
27 | * 1. Redistributions of source code must retain the above copyright notice, this
28 | * list of conditions and the following disclaimer.
29 | * 2. Redistributions in binary form must reproduce the above copyright notice,
30 | * this list of conditions and the following disclaimer in the documentation
31 | * and/or other materials provided with the distribution.
32 | *
33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 | *
44 | * CONTRIBUTION AGREEMENT
45 | *
46 | * By contributing to the BVLC/caffe repository through pull-request, comment,
47 | * or otherwise, the contributor releases their content to the
48 | * license and copyright terms herein.
49 | *
50 | ***************** END Caffe Copyright Notice and Disclaimer ********************
51 | *
52 | * Copyright (c) 2018 Microsoft
53 | * Licensed under The MIT License [see LICENSE for details]
54 | * \file modulated_deformable_im2col.h
55 | * \brief Function definitions of converting an image to
56 | * column matrix based on kernel, padding, dilation, and offset.
57 | * These functions are mainly used in deformable convolution operators.
58 | * \ref: https://arxiv.org/abs/1811.11168
59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
60 | */
61 |
62 | /***************** Adapted by Charles Shang *********************/
63 |
64 | #ifndef DCN_V2_IM2COL_CUDA
65 | #define DCN_V2_IM2COL_CUDA
66 |
67 | #ifdef __cplusplus
68 | extern "C"
69 | {
70 | #endif
71 |
72 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
73 | const float *data_im, const float *data_offset, const float *data_mask,
74 | const int batch_size, const int channels, const int height_im, const int width_im,
75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
77 | const int dilation_h, const int dilation_w,
78 | const int deformable_group, float *data_col);
79 |
80 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
81 | const float *data_col, const float *data_offset, const float *data_mask,
82 | const int batch_size, const int channels, const int height_im, const int width_im,
83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
85 | const int dilation_h, const int dilation_w,
86 | const int deformable_group, float *grad_im);
87 |
88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
90 | const int batch_size, const int channels, const int height_im, const int width_im,
91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
93 | const int dilation_h, const int dilation_w,
94 | const int deformable_group,
95 | float *grad_offset, float *grad_mask);
96 |
97 | #ifdef __cplusplus
98 | }
99 | #endif
100 |
101 | #endif
--------------------------------------------------------------------------------
/lib/DCNv2/src/cuda/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 |
4 | at::Tensor
5 | dcn_v2_cuda_forward(const at::Tensor &input,
6 | const at::Tensor &weight,
7 | const at::Tensor &bias,
8 | const at::Tensor &offset,
9 | const at::Tensor &mask,
10 | const int kernel_h,
11 | const int kernel_w,
12 | const int stride_h,
13 | const int stride_w,
14 | const int pad_h,
15 | const int pad_w,
16 | const int dilation_h,
17 | const int dilation_w,
18 | const int deformable_group);
19 |
20 | std::vector
21 | dcn_v2_cuda_backward(const at::Tensor &input,
22 | const at::Tensor &weight,
23 | const at::Tensor &bias,
24 | const at::Tensor &offset,
25 | const at::Tensor &mask,
26 | const at::Tensor &grad_output,
27 | int kernel_h, int kernel_w,
28 | int stride_h, int stride_w,
29 | int pad_h, int pad_w,
30 | int dilation_h, int dilation_w,
31 | int deformable_group);
32 |
33 |
34 | std::tuple
35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input,
36 | const at::Tensor &bbox,
37 | const at::Tensor &trans,
38 | const int no_trans,
39 | const float spatial_scale,
40 | const int output_dim,
41 | const int group_size,
42 | const int pooled_size,
43 | const int part_size,
44 | const int sample_per_part,
45 | const float trans_std);
46 |
47 | std::tuple
48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad,
49 | const at::Tensor &input,
50 | const at::Tensor &bbox,
51 | const at::Tensor &trans,
52 | const at::Tensor &top_count,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
--------------------------------------------------------------------------------
/lib/DCNv2/src/dcn_v2.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "cpu/vision.h"
4 |
5 | #ifdef WITH_CUDA
6 | #include "cuda/vision.h"
7 | #endif
8 |
9 | at::Tensor
10 | dcn_v2_forward(const at::Tensor &input,
11 | const at::Tensor &weight,
12 | const at::Tensor &bias,
13 | const at::Tensor &offset,
14 | const at::Tensor &mask,
15 | const int kernel_h,
16 | const int kernel_w,
17 | const int stride_h,
18 | const int stride_w,
19 | const int pad_h,
20 | const int pad_w,
21 | const int dilation_h,
22 | const int dilation_w,
23 | const int deformable_group)
24 | {
25 | if (input.is_cuda())
26 | {
27 | #ifdef WITH_CUDA
28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask,
29 | kernel_h, kernel_w,
30 | stride_h, stride_w,
31 | pad_h, pad_w,
32 | dilation_h, dilation_w,
33 | deformable_group);
34 | #else
35 | AT_ERROR("Not compiled with GPU support");
36 | #endif
37 | }
38 | else{
39 | return dcn_v2_cpu_forward(input, weight, bias, offset, mask,
40 | kernel_h, kernel_w,
41 | stride_h, stride_w,
42 | pad_h, pad_w,
43 | dilation_h, dilation_w,
44 | deformable_group);
45 | }
46 | }
47 |
48 | std::vector
49 | dcn_v2_backward(const at::Tensor &input,
50 | const at::Tensor &weight,
51 | const at::Tensor &bias,
52 | const at::Tensor &offset,
53 | const at::Tensor &mask,
54 | const at::Tensor &grad_output,
55 | int kernel_h, int kernel_w,
56 | int stride_h, int stride_w,
57 | int pad_h, int pad_w,
58 | int dilation_h, int dilation_w,
59 | int deformable_group)
60 | {
61 | if (input.is_cuda())
62 | {
63 | #ifdef WITH_CUDA
64 | return dcn_v2_cuda_backward(input,
65 | weight,
66 | bias,
67 | offset,
68 | mask,
69 | grad_output,
70 | kernel_h, kernel_w,
71 | stride_h, stride_w,
72 | pad_h, pad_w,
73 | dilation_h, dilation_w,
74 | deformable_group);
75 | #else
76 | AT_ERROR("Not compiled with GPU support");
77 | #endif
78 | }
79 | else{
80 | return dcn_v2_cpu_backward(input,
81 | weight,
82 | bias,
83 | offset,
84 | mask,
85 | grad_output,
86 | kernel_h, kernel_w,
87 | stride_h, stride_w,
88 | pad_h, pad_w,
89 | dilation_h, dilation_w,
90 | deformable_group);
91 | }
92 | }
93 |
94 | std::tuple
95 | dcn_v2_psroi_pooling_forward(const at::Tensor &input,
96 | const at::Tensor &bbox,
97 | const at::Tensor &trans,
98 | const int no_trans,
99 | const float spatial_scale,
100 | const int output_dim,
101 | const int group_size,
102 | const int pooled_size,
103 | const int part_size,
104 | const int sample_per_part,
105 | const float trans_std)
106 | {
107 | if (input.is_cuda())
108 | {
109 | #ifdef WITH_CUDA
110 | return dcn_v2_psroi_pooling_cuda_forward(input,
111 | bbox,
112 | trans,
113 | no_trans,
114 | spatial_scale,
115 | output_dim,
116 | group_size,
117 | pooled_size,
118 | part_size,
119 | sample_per_part,
120 | trans_std);
121 | #else
122 | AT_ERROR("Not compiled with GPU support");
123 | #endif
124 | }
125 | else{
126 | return dcn_v2_psroi_pooling_cpu_forward(input,
127 | bbox,
128 | trans,
129 | no_trans,
130 | spatial_scale,
131 | output_dim,
132 | group_size,
133 | pooled_size,
134 | part_size,
135 | sample_per_part,
136 | trans_std);
137 | }
138 | }
139 |
140 | std::tuple
141 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad,
142 | const at::Tensor &input,
143 | const at::Tensor &bbox,
144 | const at::Tensor &trans,
145 | const at::Tensor &top_count,
146 | const int no_trans,
147 | const float spatial_scale,
148 | const int output_dim,
149 | const int group_size,
150 | const int pooled_size,
151 | const int part_size,
152 | const int sample_per_part,
153 | const float trans_std)
154 | {
155 | if (input.is_cuda())
156 | {
157 | #ifdef WITH_CUDA
158 | return dcn_v2_psroi_pooling_cuda_backward(out_grad,
159 | input,
160 | bbox,
161 | trans,
162 | top_count,
163 | no_trans,
164 | spatial_scale,
165 | output_dim,
166 | group_size,
167 | pooled_size,
168 | part_size,
169 | sample_per_part,
170 | trans_std);
171 | #else
172 | AT_ERROR("Not compiled with GPU support");
173 | #endif
174 | }
175 | else{
176 | return dcn_v2_psroi_pooling_cpu_backward(out_grad,
177 | input,
178 | bbox,
179 | trans,
180 | top_count,
181 | no_trans,
182 | spatial_scale,
183 | output_dim,
184 | group_size,
185 | pooled_size,
186 | part_size,
187 | sample_per_part,
188 | trans_std);
189 | }
190 | }
--------------------------------------------------------------------------------
/lib/DCNv2/src/vision.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "dcn_v2.h"
3 |
4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward");
6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward");
7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward");
8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward");
9 | }
10 |
--------------------------------------------------------------------------------
/lib/DCNv2/test/test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import, division, print_function
3 |
4 | import torch
5 | import torch.nn as nn
6 | from torch.autograd import gradcheck
7 |
8 | from dcn_v2 import DCN, DCNPooling, DCNv2, DCNv2Pooling, dcn_v2_conv, dcn_v2_pooling
9 |
10 | deformable_groups = 1
11 | N, inC, inH, inW = 2, 2, 4, 4
12 | outC = 2
13 | kH, kW = 3, 3
14 |
15 |
16 | def conv_identify(weight, bias):
17 | weight.data.zero_()
18 | bias.data.zero_()
19 | o, i, h, w = weight.shape
20 | y = h // 2
21 | x = w // 2
22 | for p in range(i):
23 | for q in range(o):
24 | if p == q:
25 | weight.data[q, p, y, x] = 1.0
26 |
27 |
28 | def check_zero_offset():
29 | conv_offset = nn.Conv2d(
30 | inC,
31 | deformable_groups * 2 * kH * kW,
32 | kernel_size=(kH, kW),
33 | stride=(1, 1),
34 | padding=(1, 1),
35 | bias=True,
36 | ).cuda()
37 |
38 | conv_mask = nn.Conv2d(
39 | inC,
40 | deformable_groups * 1 * kH * kW,
41 | kernel_size=(kH, kW),
42 | stride=(1, 1),
43 | padding=(1, 1),
44 | bias=True,
45 | ).cuda()
46 |
47 | dcn_v2 = DCNv2(inC, outC, (kH, kW), stride=1, padding=1, dilation=1, deformable_groups=deformable_groups).cuda()
48 |
49 | conv_offset.weight.data.zero_()
50 | conv_offset.bias.data.zero_()
51 | conv_mask.weight.data.zero_()
52 | conv_mask.bias.data.zero_()
53 | conv_identify(dcn_v2.weight, dcn_v2.bias)
54 |
55 | input = torch.randn(N, inC, inH, inW).cuda()
56 | offset = conv_offset(input)
57 | mask = conv_mask(input)
58 | mask = torch.sigmoid(mask)
59 | output = dcn_v2(input, offset, mask)
60 | output *= 2
61 | d = (input - output).abs().max()
62 | if d < 1e-10:
63 | print("Zero offset passed")
64 | else:
65 | print("Zero offset failed")
66 | print(input)
67 | print(output)
68 |
69 |
70 | def check_gradient_dconv():
71 |
72 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01
73 | input.requires_grad = True
74 |
75 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2
76 | # offset.data.zero_()
77 | # offset.data -= 0.5
78 | offset.requires_grad = True
79 |
80 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
81 | # mask.data.zero_()
82 | mask.requires_grad = True
83 | mask = torch.sigmoid(mask)
84 |
85 | weight = torch.randn(outC, inC, kH, kW).cuda()
86 | weight.requires_grad = True
87 |
88 | bias = torch.rand(outC).cuda()
89 | bias.requires_grad = True
90 |
91 | stride = 1
92 | padding = 1
93 | dilation = 1
94 |
95 | print(
96 | "check_gradient_dconv: ",
97 | gradcheck(
98 | dcn_v2_conv,
99 | (input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups),
100 | eps=1e-3,
101 | atol=1e-4,
102 | rtol=1e-2,
103 | ),
104 | )
105 |
106 |
107 | def check_pooling_zero_offset():
108 |
109 | input = torch.randn(2, 16, 64, 64).cuda().zero_()
110 | input[0, :, 16:26, 16:26] = 1.0
111 | input[1, :, 10:20, 20:30] = 2.0
112 | rois = (
113 | torch.tensor(
114 | [
115 | [0, 65, 65, 103, 103],
116 | [1, 81, 41, 119, 79],
117 | ]
118 | )
119 | .cuda()
120 | .float()
121 | )
122 | pooling = DCNv2Pooling(
123 | spatial_scale=1.0 / 4,
124 | pooled_size=7,
125 | output_dim=16,
126 | no_trans=True,
127 | group_size=1,
128 | trans_std=0.0,
129 | ).cuda()
130 |
131 | out = pooling(input, rois, input.new())
132 | s = ", ".join(["%f" % out[i, :, :, :].mean().item() for i in range(rois.shape[0])])
133 | print(s)
134 |
135 | dpooling = DCNv2Pooling(
136 | spatial_scale=1.0 / 4,
137 | pooled_size=7,
138 | output_dim=16,
139 | no_trans=False,
140 | group_size=1,
141 | trans_std=0.0,
142 | ).cuda()
143 | offset = torch.randn(20, 2, 7, 7).cuda().zero_()
144 | dout = dpooling(input, rois, offset)
145 | s = ", ".join(["%f" % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])])
146 | print(s)
147 |
148 |
149 | def check_gradient_dpooling():
150 | input = torch.randn(2, 3, 5, 5).cuda() * 0.01
151 | N = 4
152 | batch_inds = torch.randint(2, (N, 1)).cuda().float()
153 | x = torch.rand((N, 1)).cuda().float() * 15
154 | y = torch.rand((N, 1)).cuda().float() * 15
155 | w = torch.rand((N, 1)).cuda().float() * 10
156 | h = torch.rand((N, 1)).cuda().float() * 10
157 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
158 | offset = torch.randn(N, 2, 3, 3).cuda()
159 | input.requires_grad = True
160 | offset.requires_grad = True
161 |
162 | spatial_scale = 1.0 / 4
163 | pooled_size = 3
164 | output_dim = 3
165 | no_trans = 0
166 | group_size = 1
167 | trans_std = 0.0
168 | sample_per_part = 4
169 | part_size = pooled_size
170 |
171 | print(
172 | "check_gradient_dpooling:",
173 | gradcheck(
174 | dcn_v2_pooling,
175 | (
176 | input,
177 | rois,
178 | offset,
179 | spatial_scale,
180 | pooled_size,
181 | output_dim,
182 | no_trans,
183 | group_size,
184 | part_size,
185 | sample_per_part,
186 | trans_std,
187 | ),
188 | eps=1e-4,
189 | ),
190 | )
191 |
192 |
193 | def example_dconv():
194 | input = torch.randn(2, 64, 128, 128).cuda()
195 | # wrap all things (offset and mask) in DCN
196 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, padding=1, deformable_groups=2).cuda()
197 | # print(dcn.weight.shape, input.shape)
198 | output = dcn(input)
199 | targert = output.new(*output.size())
200 | targert.data.uniform_(-0.01, 0.01)
201 | error = (targert - output).mean()
202 | error.backward()
203 | print(output.shape)
204 |
205 |
206 | def example_dpooling():
207 | input = torch.randn(2, 32, 64, 64).cuda()
208 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
209 | x = torch.randint(256, (20, 1)).cuda().float()
210 | y = torch.randint(256, (20, 1)).cuda().float()
211 | w = torch.randint(64, (20, 1)).cuda().float()
212 | h = torch.randint(64, (20, 1)).cuda().float()
213 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
214 | offset = torch.randn(20, 2, 7, 7).cuda()
215 | input.requires_grad = True
216 | offset.requires_grad = True
217 |
218 | # normal roi_align
219 | pooling = DCNv2Pooling(
220 | spatial_scale=1.0 / 4,
221 | pooled_size=7,
222 | output_dim=32,
223 | no_trans=True,
224 | group_size=1,
225 | trans_std=0.1,
226 | ).cuda()
227 |
228 | # deformable pooling
229 | dpooling = DCNv2Pooling(
230 | spatial_scale=1.0 / 4,
231 | pooled_size=7,
232 | output_dim=32,
233 | no_trans=False,
234 | group_size=1,
235 | trans_std=0.1,
236 | ).cuda()
237 |
238 | out = pooling(input, rois, offset)
239 | dout = dpooling(input, rois, offset)
240 | print(out.shape)
241 | print(dout.shape)
242 |
243 | target_out = out.new(*out.size())
244 | target_out.data.uniform_(-0.01, 0.01)
245 | target_dout = dout.new(*dout.size())
246 | target_dout.data.uniform_(-0.01, 0.01)
247 | e = (target_out - out).mean()
248 | e.backward()
249 | e = (target_dout - dout).mean()
250 | e.backward()
251 |
252 |
253 | def example_mdpooling():
254 | input = torch.randn(2, 32, 64, 64).cuda()
255 | input.requires_grad = True
256 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
257 | x = torch.randint(256, (20, 1)).cuda().float()
258 | y = torch.randint(256, (20, 1)).cuda().float()
259 | w = torch.randint(64, (20, 1)).cuda().float()
260 | h = torch.randint(64, (20, 1)).cuda().float()
261 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
262 |
263 | # mdformable pooling (V2)
264 | dpooling = DCNPooling(
265 | spatial_scale=1.0 / 4,
266 | pooled_size=7,
267 | output_dim=32,
268 | no_trans=False,
269 | group_size=1,
270 | trans_std=0.1,
271 | deform_fc_dim=1024,
272 | ).cuda()
273 |
274 | dout = dpooling(input, rois)
275 | target = dout.new(*dout.size())
276 | target.data.uniform_(-0.1, 0.1)
277 | error = (target - dout).mean()
278 | error.backward()
279 | print(dout.shape)
280 |
281 |
282 | if __name__ == "__main__":
283 |
284 | example_dconv()
285 | example_dpooling()
286 | example_mdpooling()
287 |
288 | check_pooling_zero_offset()
289 | # zero offset check
290 | if inC == outC:
291 | check_zero_offset()
292 |
293 | check_gradient_dpooling()
294 | check_gradient_dconv()
295 | # """
296 | # ****** Note: backward is not reentrant error may not be a serious problem,
297 | # ****** since the max error is less than 1e-7,
298 | # ****** Still looking for what trigger this problem
299 | # """
300 |
--------------------------------------------------------------------------------
/lib/DCNv2/test/testcpu.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import, division, print_function
3 |
4 | import torch
5 | import torch.nn as nn
6 | from torch.autograd import gradcheck
7 |
8 | from dcn_v2 import DCN, DCNPooling, DCNv2, DCNv2Pooling, dcn_v2_conv, dcn_v2_pooling
9 |
10 | deformable_groups = 1
11 | N, inC, inH, inW = 2, 2, 4, 4
12 | outC = 2
13 | kH, kW = 3, 3
14 |
15 |
16 | def conv_identify(weight, bias):
17 | weight.data.zero_()
18 | bias.data.zero_()
19 | o, i, h, w = weight.shape
20 | y = h // 2
21 | x = w // 2
22 | for p in range(i):
23 | for q in range(o):
24 | if p == q:
25 | weight.data[q, p, y, x] = 1.0
26 |
27 |
28 | def check_zero_offset():
29 | conv_offset = nn.Conv2d(
30 | inC,
31 | deformable_groups * 2 * kH * kW,
32 | kernel_size=(kH, kW),
33 | stride=(1, 1),
34 | padding=(1, 1),
35 | bias=True,
36 | )
37 |
38 | conv_mask = nn.Conv2d(
39 | inC,
40 | deformable_groups * 1 * kH * kW,
41 | kernel_size=(kH, kW),
42 | stride=(1, 1),
43 | padding=(1, 1),
44 | bias=True,
45 | )
46 |
47 | dcn_v2 = DCNv2(inC, outC, (kH, kW), stride=1, padding=1, dilation=1, deformable_groups=deformable_groups)
48 |
49 | conv_offset.weight.data.zero_()
50 | conv_offset.bias.data.zero_()
51 | conv_mask.weight.data.zero_()
52 | conv_mask.bias.data.zero_()
53 | conv_identify(dcn_v2.weight, dcn_v2.bias)
54 |
55 | input = torch.randn(N, inC, inH, inW)
56 | offset = conv_offset(input)
57 | mask = conv_mask(input)
58 | mask = torch.sigmoid(mask)
59 | output = dcn_v2(input, offset, mask)
60 | output *= 2
61 | d = (input - output).abs().max()
62 | if d < 1e-10:
63 | print("Zero offset passed")
64 | else:
65 | print("Zero offset failed")
66 | print(input)
67 | print(output)
68 |
69 |
70 | def check_gradient_dconv():
71 |
72 | input = torch.rand(N, inC, inH, inW) * 0.01
73 | input.requires_grad = True
74 |
75 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW) * 2
76 | # offset.data.zero_()
77 | # offset.data -= 0.5
78 | offset.requires_grad = True
79 |
80 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW)
81 | # mask.data.zero_()
82 | mask.requires_grad = True
83 | mask = torch.sigmoid(mask)
84 |
85 | weight = torch.randn(outC, inC, kH, kW)
86 | weight.requires_grad = True
87 |
88 | bias = torch.rand(outC)
89 | bias.requires_grad = True
90 |
91 | stride = 1
92 | padding = 1
93 | dilation = 1
94 |
95 | print(
96 | "check_gradient_dconv: ",
97 | gradcheck(
98 | dcn_v2_conv,
99 | (input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups),
100 | eps=1e-3,
101 | atol=1e-4,
102 | rtol=1e-2,
103 | ),
104 | )
105 |
106 |
107 | def check_pooling_zero_offset():
108 |
109 | input = torch.randn(2, 16, 64, 64).zero_()
110 | input[0, :, 16:26, 16:26] = 1.0
111 | input[1, :, 10:20, 20:30] = 2.0
112 | rois = torch.tensor(
113 | [
114 | [0, 65, 65, 103, 103],
115 | [1, 81, 41, 119, 79],
116 | ]
117 | ).float()
118 | pooling = DCNv2Pooling(
119 | spatial_scale=1.0 / 4,
120 | pooled_size=7,
121 | output_dim=16,
122 | no_trans=True,
123 | group_size=1,
124 | trans_std=0.0,
125 | )
126 |
127 | out = pooling(input, rois, input.new())
128 | s = ", ".join(["%f" % out[i, :, :, :].mean().item() for i in range(rois.shape[0])])
129 | print(s)
130 |
131 | dpooling = DCNv2Pooling(
132 | spatial_scale=1.0 / 4,
133 | pooled_size=7,
134 | output_dim=16,
135 | no_trans=False,
136 | group_size=1,
137 | trans_std=0.0,
138 | )
139 | offset = torch.randn(20, 2, 7, 7).zero_()
140 | dout = dpooling(input, rois, offset)
141 | s = ", ".join(["%f" % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])])
142 | print(s)
143 |
144 |
145 | def check_gradient_dpooling():
146 | input = torch.randn(2, 3, 5, 5) * 0.01
147 | N = 4
148 | batch_inds = torch.randint(2, (N, 1)).float()
149 | x = torch.rand((N, 1)).float() * 15
150 | y = torch.rand((N, 1)).float() * 15
151 | w = torch.rand((N, 1)).float() * 10
152 | h = torch.rand((N, 1)).float() * 10
153 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
154 | offset = torch.randn(N, 2, 3, 3)
155 | input.requires_grad = True
156 | offset.requires_grad = True
157 |
158 | spatial_scale = 1.0 / 4
159 | pooled_size = 3
160 | output_dim = 3
161 | no_trans = 0
162 | group_size = 1
163 | trans_std = 0.0
164 | sample_per_part = 4
165 | part_size = pooled_size
166 |
167 | print(
168 | "check_gradient_dpooling:",
169 | gradcheck(
170 | dcn_v2_pooling,
171 | (
172 | input,
173 | rois,
174 | offset,
175 | spatial_scale,
176 | pooled_size,
177 | output_dim,
178 | no_trans,
179 | group_size,
180 | part_size,
181 | sample_per_part,
182 | trans_std,
183 | ),
184 | eps=1e-4,
185 | ),
186 | )
187 |
188 |
189 | def example_dconv():
190 | input = torch.randn(2, 64, 128, 128)
191 | # wrap all things (offset and mask) in DCN
192 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, padding=1, deformable_groups=2)
193 | # print(dcn.weight.shape, input.shape)
194 | output = dcn(input)
195 | targert = output.new(*output.size())
196 | targert.data.uniform_(-0.01, 0.01)
197 | error = (targert - output).mean()
198 | error.backward()
199 | print(output.shape)
200 |
201 |
202 | def example_dpooling():
203 | input = torch.randn(2, 32, 64, 64)
204 | batch_inds = torch.randint(2, (20, 1)).float()
205 | x = torch.randint(256, (20, 1)).float()
206 | y = torch.randint(256, (20, 1)).float()
207 | w = torch.randint(64, (20, 1)).float()
208 | h = torch.randint(64, (20, 1)).float()
209 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
210 | offset = torch.randn(20, 2, 7, 7)
211 | input.requires_grad = True
212 | offset.requires_grad = True
213 |
214 | # normal roi_align
215 | pooling = DCNv2Pooling(
216 | spatial_scale=1.0 / 4,
217 | pooled_size=7,
218 | output_dim=32,
219 | no_trans=True,
220 | group_size=1,
221 | trans_std=0.1,
222 | )
223 |
224 | # deformable pooling
225 | dpooling = DCNv2Pooling(
226 | spatial_scale=1.0 / 4,
227 | pooled_size=7,
228 | output_dim=32,
229 | no_trans=False,
230 | group_size=1,
231 | trans_std=0.1,
232 | )
233 |
234 | out = pooling(input, rois, offset)
235 | dout = dpooling(input, rois, offset)
236 | print(out.shape)
237 | print(dout.shape)
238 |
239 | target_out = out.new(*out.size())
240 | target_out.data.uniform_(-0.01, 0.01)
241 | target_dout = dout.new(*dout.size())
242 | target_dout.data.uniform_(-0.01, 0.01)
243 | e = (target_out - out).mean()
244 | e.backward()
245 | e = (target_dout - dout).mean()
246 | e.backward()
247 |
248 |
249 | def example_mdpooling():
250 | input = torch.randn(2, 32, 64, 64)
251 | input.requires_grad = True
252 | batch_inds = torch.randint(2, (20, 1)).float()
253 | x = torch.randint(256, (20, 1)).float()
254 | y = torch.randint(256, (20, 1)).float()
255 | w = torch.randint(64, (20, 1)).float()
256 | h = torch.randint(64, (20, 1)).float()
257 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
258 |
259 | # mdformable pooling (V2)
260 | dpooling = DCNPooling(
261 | spatial_scale=1.0 / 4,
262 | pooled_size=7,
263 | output_dim=32,
264 | no_trans=False,
265 | group_size=1,
266 | trans_std=0.1,
267 | deform_fc_dim=1024,
268 | )
269 |
270 | dout = dpooling(input, rois)
271 | target = dout.new(*dout.size())
272 | target.data.uniform_(-0.1, 0.1)
273 | error = (target - dout).mean()
274 | error.backward()
275 | print(dout.shape)
276 |
277 |
278 | if __name__ == "__main__":
279 |
280 | example_dconv()
281 | example_dpooling()
282 | example_mdpooling()
283 |
284 | check_pooling_zero_offset()
285 | # zero offset check
286 | if inC == outC:
287 | check_zero_offset()
288 |
289 | check_gradient_dpooling()
290 | check_gradient_dconv()
291 | # """
292 | # ****** Note: backward is not reentrant error may not be a serious problem,
293 | # ****** since the max error is less than 1e-7,
294 | # ****** Still looking for what trigger this problem
295 | # """
296 |
--------------------------------------------------------------------------------
/lib/DCNv2/test/testcuda.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import, division, print_function
3 |
4 | import torch
5 | import torch.nn as nn
6 | from torch.autograd import gradcheck
7 |
8 | from dcn_v2 import DCN, DCNPooling, DCNv2, DCNv2Pooling, dcn_v2_conv, dcn_v2_pooling
9 |
10 | deformable_groups = 1
11 | N, inC, inH, inW = 2, 2, 4, 4
12 | outC = 2
13 | kH, kW = 3, 3
14 |
15 |
16 | def conv_identify(weight, bias):
17 | weight.data.zero_()
18 | bias.data.zero_()
19 | o, i, h, w = weight.shape
20 | y = h // 2
21 | x = w // 2
22 | for p in range(i):
23 | for q in range(o):
24 | if p == q:
25 | weight.data[q, p, y, x] = 1.0
26 |
27 |
28 | def check_zero_offset():
29 | conv_offset = nn.Conv2d(
30 | inC,
31 | deformable_groups * 2 * kH * kW,
32 | kernel_size=(kH, kW),
33 | stride=(1, 1),
34 | padding=(1, 1),
35 | bias=True,
36 | ).cuda()
37 |
38 | conv_mask = nn.Conv2d(
39 | inC,
40 | deformable_groups * 1 * kH * kW,
41 | kernel_size=(kH, kW),
42 | stride=(1, 1),
43 | padding=(1, 1),
44 | bias=True,
45 | ).cuda()
46 |
47 | dcn_v2 = DCNv2(inC, outC, (kH, kW), stride=1, padding=1, dilation=1, deformable_groups=deformable_groups).cuda()
48 |
49 | conv_offset.weight.data.zero_()
50 | conv_offset.bias.data.zero_()
51 | conv_mask.weight.data.zero_()
52 | conv_mask.bias.data.zero_()
53 | conv_identify(dcn_v2.weight, dcn_v2.bias)
54 |
55 | input = torch.randn(N, inC, inH, inW).cuda()
56 | offset = conv_offset(input)
57 | mask = conv_mask(input)
58 | mask = torch.sigmoid(mask)
59 | output = dcn_v2(input, offset, mask)
60 | output *= 2
61 | d = (input - output).abs().max()
62 | if d < 1e-10:
63 | print("Zero offset passed")
64 | else:
65 | print("Zero offset failed")
66 | print(input)
67 | print(output)
68 |
69 |
70 | def check_gradient_dconv():
71 |
72 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01
73 | input.requires_grad = True
74 |
75 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2
76 | # offset.data.zero_()
77 | # offset.data -= 0.5
78 | offset.requires_grad = True
79 |
80 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
81 | # mask.data.zero_()
82 | mask.requires_grad = True
83 | mask = torch.sigmoid(mask)
84 |
85 | weight = torch.randn(outC, inC, kH, kW).cuda()
86 | weight.requires_grad = True
87 |
88 | bias = torch.rand(outC).cuda()
89 | bias.requires_grad = True
90 |
91 | stride = 1
92 | padding = 1
93 | dilation = 1
94 |
95 | print(
96 | "check_gradient_dconv: ",
97 | gradcheck(
98 | dcn_v2_conv,
99 | (input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups),
100 | eps=1e-3,
101 | atol=1e-4,
102 | rtol=1e-2,
103 | ),
104 | )
105 |
106 |
107 | def check_pooling_zero_offset():
108 |
109 | input = torch.randn(2, 16, 64, 64).cuda().zero_()
110 | input[0, :, 16:26, 16:26] = 1.0
111 | input[1, :, 10:20, 20:30] = 2.0
112 | rois = (
113 | torch.tensor(
114 | [
115 | [0, 65, 65, 103, 103],
116 | [1, 81, 41, 119, 79],
117 | ]
118 | )
119 | .cuda()
120 | .float()
121 | )
122 | pooling = DCNv2Pooling(
123 | spatial_scale=1.0 / 4,
124 | pooled_size=7,
125 | output_dim=16,
126 | no_trans=True,
127 | group_size=1,
128 | trans_std=0.0,
129 | ).cuda()
130 |
131 | out = pooling(input, rois, input.new())
132 | s = ", ".join(["%f" % out[i, :, :, :].mean().item() for i in range(rois.shape[0])])
133 | print(s)
134 |
135 | dpooling = DCNv2Pooling(
136 | spatial_scale=1.0 / 4,
137 | pooled_size=7,
138 | output_dim=16,
139 | no_trans=False,
140 | group_size=1,
141 | trans_std=0.0,
142 | ).cuda()
143 | offset = torch.randn(20, 2, 7, 7).cuda().zero_()
144 | dout = dpooling(input, rois, offset)
145 | s = ", ".join(["%f" % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])])
146 | print(s)
147 |
148 |
149 | def check_gradient_dpooling():
150 | input = torch.randn(2, 3, 5, 5).cuda().float() * 0.01
151 | N = 4
152 | batch_inds = torch.randint(2, (N, 1)).cuda().float()
153 | x = torch.rand((N, 1)).cuda().float() * 15
154 | y = torch.rand((N, 1)).cuda().float() * 15
155 | w = torch.rand((N, 1)).cuda().float() * 10
156 | h = torch.rand((N, 1)).cuda().float() * 10
157 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
158 | offset = torch.randn(N, 2, 3, 3).cuda()
159 | input.requires_grad = True
160 | offset.requires_grad = True
161 |
162 | spatial_scale = 1.0 / 4
163 | pooled_size = 3
164 | output_dim = 3
165 | no_trans = 0
166 | group_size = 1
167 | trans_std = 0.0
168 | sample_per_part = 4
169 | part_size = pooled_size
170 |
171 | print(
172 | "check_gradient_dpooling:",
173 | gradcheck(
174 | dcn_v2_pooling,
175 | (
176 | input,
177 | rois,
178 | offset,
179 | spatial_scale,
180 | pooled_size,
181 | output_dim,
182 | no_trans,
183 | group_size,
184 | part_size,
185 | sample_per_part,
186 | trans_std,
187 | ),
188 | eps=1e-4,
189 | ),
190 | )
191 |
192 |
193 | def example_dconv():
194 | input = torch.randn(2, 64, 128, 128).cuda()
195 | # wrap all things (offset and mask) in DCN
196 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, padding=1, deformable_groups=2).cuda()
197 | # print(dcn.weight.shape, input.shape)
198 | output = dcn(input)
199 | targert = output.new(*output.size())
200 | targert.data.uniform_(-0.01, 0.01)
201 | error = (targert - output).mean()
202 | error.backward()
203 | print(output.shape)
204 |
205 |
206 | def example_dpooling():
207 | input = torch.randn(2, 32, 64, 64).cuda()
208 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
209 | x = torch.randint(256, (20, 1)).cuda().float()
210 | y = torch.randint(256, (20, 1)).cuda().float()
211 | w = torch.randint(64, (20, 1)).cuda().float()
212 | h = torch.randint(64, (20, 1)).cuda().float()
213 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
214 | offset = torch.randn(20, 2, 7, 7).cuda()
215 | input.requires_grad = True
216 | offset.requires_grad = True
217 |
218 | # normal roi_align
219 | pooling = DCNv2Pooling(
220 | spatial_scale=1.0 / 4,
221 | pooled_size=7,
222 | output_dim=32,
223 | no_trans=True,
224 | group_size=1,
225 | trans_std=0.1,
226 | ).cuda()
227 |
228 | # deformable pooling
229 | dpooling = DCNv2Pooling(
230 | spatial_scale=1.0 / 4,
231 | pooled_size=7,
232 | output_dim=32,
233 | no_trans=False,
234 | group_size=1,
235 | trans_std=0.1,
236 | ).cuda()
237 |
238 | out = pooling(input, rois, offset)
239 | dout = dpooling(input, rois, offset)
240 | print(out.shape)
241 | print(dout.shape)
242 |
243 | target_out = out.new(*out.size())
244 | target_out.data.uniform_(-0.01, 0.01)
245 | target_dout = dout.new(*dout.size())
246 | target_dout.data.uniform_(-0.01, 0.01)
247 | e = (target_out - out).mean()
248 | e.backward()
249 | e = (target_dout - dout).mean()
250 | e.backward()
251 |
252 |
253 | def example_mdpooling():
254 | input = torch.randn(2, 32, 64, 64).cuda()
255 | input.requires_grad = True
256 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
257 | x = torch.randint(256, (20, 1)).cuda().float()
258 | y = torch.randint(256, (20, 1)).cuda().float()
259 | w = torch.randint(64, (20, 1)).cuda().float()
260 | h = torch.randint(64, (20, 1)).cuda().float()
261 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
262 |
263 | # mdformable pooling (V2)
264 | dpooling = DCNPooling(
265 | spatial_scale=1.0 / 4,
266 | pooled_size=7,
267 | output_dim=32,
268 | no_trans=False,
269 | group_size=1,
270 | trans_std=0.1,
271 | deform_fc_dim=1024,
272 | ).cuda()
273 |
274 | dout = dpooling(input, rois)
275 | target = dout.new(*dout.size())
276 | target.data.uniform_(-0.1, 0.1)
277 | error = (target - dout).mean()
278 | error.backward()
279 | print(dout.shape)
280 |
281 |
282 | if __name__ == "__main__":
283 |
284 | example_dconv()
285 | example_dpooling()
286 | example_mdpooling()
287 |
288 | check_pooling_zero_offset()
289 | # zero offset check
290 | if inC == outC:
291 | check_zero_offset()
292 |
293 | check_gradient_dpooling()
294 | check_gradient_dconv()
295 | # """
296 | # ****** Note: backward is not reentrant error may not be a serious problem,
297 | # ****** since the max error is less than 1e-7,
298 | # ****** Still looking for what trigger this problem
299 | # """
300 |
--------------------------------------------------------------------------------
/lib/cfg/crowdhuman.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "crowdhuman_train":"./data/crowdhuman.train",
6 | "crowdhuman_test":"./data/crowdhuman.val"
7 | },
8 | "test_emb":
9 | {
10 | "mot15":"./data/mot15.val"
11 | },
12 | "test":
13 | {
14 | "mot15":"./data/mot15.val"
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/lib/cfg/data.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot17":"./data/mot17.train",
6 | "caltech":"./data/caltech.all",
7 | "citypersons":"./data/citypersons.train",
8 | "cuhksysu":"./data/cuhksysu.train",
9 | "prw":"./data/prw.train",
10 | "eth":"./data/eth.train"
11 | },
12 | "test_emb":
13 | {
14 | "mot15":"./data/mot15.val"
15 | },
16 | "test":
17 | {
18 | "mot15":"./data/mot15.val"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/lib/cfg/data_all.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot17":"./data/mot17.train",
6 | "caltech":"./data/caltech.all",
7 | "citypersons":"./data/citypersons.train",
8 | "cuhksysu":"./data/cuhksysu.train",
9 | "prw":"./data/prw.train",
10 | "eth":"./data/eth.train",
11 | "crowdhuman_train":"./data/crowdhuman.train",
12 | "crowdhuman_val":"./data/crowdhuman.val"
13 | },
14 | "test_emb":
15 | {
16 | "mot15":"./data/mot15.val"
17 | },
18 | "test":
19 | {
20 | "mot15":"./data/mot15.val"
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/lib/cfg/data_half.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot17":"./data/mot17.half",
6 | "caltech":"./data/caltech.all",
7 | "citypersons":"./data/citypersons.train",
8 | "cuhksysu":"./data/cuhksysu.train",
9 | "prw":"./data/prw.train",
10 | "eth":"./data/eth.train"
11 | },
12 | "test_emb":
13 | {
14 | "mot15":"./data/mot15.val"
15 | },
16 | "test":
17 | {
18 | "mot15":"./data/mot15.val"
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/lib/cfg/mot15.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot15":"./data/mot15.train"
6 | },
7 | "test_emb":
8 | {
9 | "mot15":"./data/mot15.train"
10 | },
11 | "test":
12 | {
13 | "mot15":"./data/mot15.train"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/lib/cfg/mot16.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot17":"./data/mot16.train"
6 | },
7 | "test_emb":
8 | {
9 | "mot17":"./data/mot16.train"
10 | },
11 | "test":
12 | {
13 | "mot17":"./data/mot16.train"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/lib/cfg/mot17.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot17":"./data/mot17.train"
6 | },
7 | "test_emb":
8 | {
9 | "mot17":"./data/mot17.train"
10 | },
11 | "test":
12 | {
13 | "mot17":"./data/mot17.train"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/lib/cfg/mot17_half.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot17":"./data/mot17.half"
6 | },
7 | "test_emb":
8 | {
9 | "mot17":"./data/mot17.emb"
10 | },
11 | "test":
12 | {
13 | "mot17":"./data/mot17.val"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/lib/cfg/mot20.json:
--------------------------------------------------------------------------------
1 | {
2 | "root":"/home/zyf/dataset",
3 | "train":
4 | {
5 | "mot20":"./data/mot20.train"
6 | },
7 | "test_emb":
8 | {
9 | "mot20":"./data/mot20.train"
10 | },
11 | "test":
12 | {
13 | "mot20":"./data/mot20.train"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/lib/datasets/dataset/__pycache__/jde.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/datasets/dataset/__pycache__/jde.cpython-37.pyc
--------------------------------------------------------------------------------
/lib/datasets/dataset/__pycache__/jde.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/datasets/dataset/__pycache__/jde.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from .dataset.jde import JointDataset
6 |
7 |
8 | def get_dataset(dataset, task):
9 | if task == 'mot':
10 | return JointDataset
11 | else:
12 | return None
13 |
14 |
--------------------------------------------------------------------------------
/lib/logger.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
6 | import os
7 | import time
8 | import sys
9 | import torch
10 | USE_TENSORBOARD = True
11 | try:
12 | import tensorboardX
13 | print('Using tensorboardX')
14 | except:
15 | USE_TENSORBOARD = False
16 |
17 | class Logger(object):
18 | def __init__(self, opt):
19 | """Create a summary writer logging to log_dir."""
20 | if not os.path.exists(opt.save_dir):
21 | os.makedirs(opt.save_dir)
22 | if not os.path.exists(opt.debug_dir):
23 | os.makedirs(opt.debug_dir)
24 |
25 | time_str = time.strftime('%Y-%m-%d-%H-%M')
26 |
27 | args = dict((name, getattr(opt, name)) for name in dir(opt)
28 | if not name.startswith('_'))
29 | file_name = os.path.join(opt.save_dir, 'opt.txt')
30 | with open(file_name, 'wt') as opt_file:
31 | opt_file.write('==> torch version: {}\n'.format(torch.__version__))
32 | opt_file.write('==> cudnn version: {}\n'.format(
33 | torch.backends.cudnn.version()))
34 | opt_file.write('==> Cmd:\n')
35 | opt_file.write(str(sys.argv))
36 | opt_file.write('\n==> Opt:\n')
37 | for k, v in sorted(args.items()):
38 | opt_file.write(' %s: %s\n' % (str(k), str(v)))
39 |
40 | log_dir = opt.save_dir + '/logs_{}'.format(time_str)
41 | if USE_TENSORBOARD:
42 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
43 | else:
44 | if not os.path.exists(os.path.dirname(log_dir)):
45 | os.mkdir(os.path.dirname(log_dir))
46 | if not os.path.exists(log_dir):
47 | os.mkdir(log_dir)
48 | self.log = open(log_dir + '/log.txt', 'w')
49 | try:
50 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
51 | except:
52 | pass
53 | self.start_line = True
54 |
55 | def write(self, txt):
56 | if self.start_line:
57 | time_str = time.strftime('%Y-%m-%d-%H-%M')
58 | self.log.write('{}: {}'.format(time_str, txt))
59 | else:
60 | self.log.write(txt)
61 | self.start_line = False
62 | if '\n' in txt:
63 | self.start_line = True
64 | self.log.flush()
65 |
66 | def close(self):
67 | self.log.close()
68 |
69 | def scalar_summary(self, tag, value, step):
70 | """Log a scalar variable."""
71 | if USE_TENSORBOARD:
72 | self.writer.add_scalar(tag, value, step)
73 |
--------------------------------------------------------------------------------
/lib/models/__pycache__/common.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/common.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/decode.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/decode.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/model.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/__pycache__/yolo.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/yolo.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/common.py:
--------------------------------------------------------------------------------
1 | # YOLOv5 common modules
2 |
3 | import math
4 | from copy import copy
5 | from pathlib import Path
6 |
7 | import numpy as np
8 | import pandas as pd
9 | import torch
10 | import torch.nn as nn
11 | from PIL import Image
12 | from torch.cuda import amp
13 | from lib.DCNv2.dcn_v2 import DCN
14 |
15 |
16 | def autopad(k, p=None): # kernel, padding
17 | # Pad to 'same'
18 | if p is None:
19 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
20 | return p
21 |
22 |
23 | def DWConv(c1, c2, k=1, s=1, act=True):
24 | # Depthwise convolution
25 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
26 |
27 |
28 | class Conv(nn.Module):
29 | # Standard convolution
30 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
31 | super(Conv, self).__init__()
32 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
33 | self.bn = nn.BatchNorm2d(c2)
34 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
35 |
36 | def forward(self, x):
37 | return self.act(self.bn(self.conv(x)))
38 |
39 | def fuseforward(self, x):
40 | return self.act(self.conv(x))
41 |
42 |
43 | class DeConv(nn.Module):
44 | # convtranspose
45 | def __init__(self, c1, c2, k=4, s=2):
46 | super(DeConv, self).__init__()
47 | self.deconv = nn.ConvTranspose2d(
48 | in_channels=c1,
49 | out_channels=c2,
50 | kernel_size=k,
51 | stride=s,
52 | padding=1,
53 | output_padding=0,
54 | bias=False)
55 | self.bn = nn.BatchNorm2d(c2)
56 | self.act = nn.SiLU()
57 |
58 | def forward(self, x):
59 | return self.act(self.bn(self.deconv(x)))
60 |
61 |
62 | def fill_up_weights(up):
63 | w = up.weight.data
64 | f = math.ceil(w.size(2) / 2)
65 | c = (2 * f - 1 - f % 2) / (2. * f)
66 | for i in range(w.size(2)):
67 | for j in range(w.size(3)):
68 | w[0, 0, i, j] = \
69 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
70 | for c in range(1, w.size(0)):
71 | w[c, 0, :, :] = w[0, 0, :, :]
72 |
73 |
74 | class DeConvDCN(nn.Module):
75 | # convtranspose with dcn
76 | def __init__(self, c1, c2, k=4, s=2):
77 | super(DeConvDCN, self).__init__()
78 | self.layers = []
79 | dcn = DCN(c1, c2,
80 | kernel_size=(3, 3), stride=1,
81 | padding=1, dilation=1, deformable_groups=1)
82 | deconv = nn.ConvTranspose2d(
83 | in_channels=c2,
84 | out_channels=c2,
85 | kernel_size=k,
86 | stride=s,
87 | padding=1,
88 | output_padding=0,
89 | bias=False)
90 | fill_up_weights(deconv)
91 | self.layers.append(dcn)
92 | self.layers.append(nn.BatchNorm2d(c2))
93 | self.layers.append(nn.SiLU())
94 | self.layers.append(deconv)
95 | self.layers.append(nn.BatchNorm2d(c2))
96 | self.layers.append(nn.SiLU())
97 | self.layers = nn.Sequential(*self.layers)
98 |
99 | def forward(self, x):
100 | return self.layers(x)
101 |
102 |
103 | class TransformerLayer(nn.Module):
104 | # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
105 | def __init__(self, c, num_heads):
106 | super().__init__()
107 | self.q = nn.Linear(c, c, bias=False)
108 | self.k = nn.Linear(c, c, bias=False)
109 | self.v = nn.Linear(c, c, bias=False)
110 | self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
111 | self.fc1 = nn.Linear(c, c, bias=False)
112 | self.fc2 = nn.Linear(c, c, bias=False)
113 |
114 | def forward(self, x):
115 | x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
116 | x = self.fc2(self.fc1(x)) + x
117 | return x
118 |
119 |
120 | class TransformerBlock(nn.Module):
121 | # Vision Transformer https://arxiv.org/abs/2010.11929
122 | def __init__(self, c1, c2, num_heads, num_layers):
123 | super().__init__()
124 | self.conv = None
125 | if c1 != c2:
126 | self.conv = Conv(c1, c2)
127 | self.linear = nn.Linear(c2, c2) # learnable position embedding
128 | self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)])
129 | self.c2 = c2
130 |
131 | def forward(self, x):
132 | if self.conv is not None:
133 | x = self.conv(x)
134 | b, _, w, h = x.shape
135 | p = x.flatten(2)
136 | p = p.unsqueeze(0)
137 | p = p.transpose(0, 3)
138 | p = p.squeeze(3)
139 | e = self.linear(p)
140 | x = p + e
141 |
142 | x = self.tr(x)
143 | x = x.unsqueeze(3)
144 | x = x.transpose(0, 3)
145 | x = x.reshape(b, self.c2, w, h)
146 | return x
147 |
148 |
149 | class Bottleneck(nn.Module):
150 | # Standard bottleneck
151 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
152 | super(Bottleneck, self).__init__()
153 | c_ = int(c2 * e) # hidden channels
154 | self.cv1 = Conv(c1, c_, 1, 1)
155 | self.cv2 = Conv(c_, c2, 3, 1, g=g)
156 | self.add = shortcut and c1 == c2
157 |
158 | def forward(self, x):
159 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
160 |
161 |
162 | class BottleneckCSP(nn.Module):
163 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
164 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
165 | super(BottleneckCSP, self).__init__()
166 | c_ = int(c2 * e) # hidden channels
167 | self.cv1 = Conv(c1, c_, 1, 1)
168 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
169 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
170 | self.cv4 = Conv(2 * c_, c2, 1, 1)
171 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
172 | self.act = nn.LeakyReLU(0.1, inplace=True)
173 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
174 |
175 | def forward(self, x):
176 | y1 = self.cv3(self.m(self.cv1(x)))
177 | y2 = self.cv2(x)
178 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
179 |
180 |
181 | class C3(nn.Module):
182 | # CSP Bottleneck with 3 convolutions
183 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
184 | super(C3, self).__init__()
185 | c_ = int(c2 * e) # hidden channels
186 | self.cv1 = Conv(c1, c_, 1, 1)
187 | self.cv2 = Conv(c1, c_, 1, 1)
188 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2)
189 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
190 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
191 |
192 | def forward(self, x):
193 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
194 |
195 |
196 | class C3TR(C3):
197 | # C3 module with TransformerBlock()
198 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
199 | super().__init__(c1, c2, n, shortcut, g, e)
200 | c_ = int(c2 * e)
201 | self.m = TransformerBlock(c_, c_, 4, n)
202 |
203 |
204 | class SPP(nn.Module):
205 | # Spatial pyramid pooling layer used in YOLOv3-SPP
206 | def __init__(self, c1, c2, k=(5, 9, 13)):
207 | super(SPP, self).__init__()
208 | c_ = c1 // 2 # hidden channels
209 | self.cv1 = Conv(c1, c_, 1, 1)
210 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
211 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
212 |
213 | def forward(self, x):
214 | x = self.cv1(x)
215 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
216 |
217 |
218 | class Focus(nn.Module):
219 | # Focus wh information into c-space
220 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
221 | super(Focus, self).__init__()
222 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
223 | # self.contract = Contract(gain=2)
224 |
225 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
226 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
227 | # return self.conv(self.contract(x))
228 |
229 |
230 | class Contract(nn.Module):
231 | # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
232 | def __init__(self, gain=2):
233 | super().__init__()
234 | self.gain = gain
235 |
236 | def forward(self, x):
237 | N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
238 | s = self.gain
239 | x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
240 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
241 | return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)
242 |
243 |
244 | class Expand(nn.Module):
245 | # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
246 | def __init__(self, gain=2):
247 | super().__init__()
248 | self.gain = gain
249 |
250 | def forward(self, x):
251 | N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
252 | s = self.gain
253 | x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
254 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
255 | return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)
256 |
257 |
258 | class Concat(nn.Module):
259 | # Concatenate a list of tensors along dimension
260 | def __init__(self, dimension=1):
261 | super(Concat, self).__init__()
262 | self.d = dimension
263 |
264 | def forward(self, x):
265 | return torch.cat(x, self.d)
--------------------------------------------------------------------------------
/lib/models/data_parallel.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.modules import Module
3 | from torch.nn.parallel.scatter_gather import gather
4 | from torch.nn.parallel.replicate import replicate
5 | from torch.nn.parallel.parallel_apply import parallel_apply
6 |
7 |
8 | from .scatter_gather import scatter_kwargs
9 |
10 | class _DataParallel(Module):
11 | r"""Implements data parallelism at the module level.
12 |
13 | This container parallelizes the application of the given module by
14 | splitting the input across the specified devices by chunking in the batch
15 | dimension. In the forward pass, the module is replicated on each device,
16 | and each replica handles a portion of the input. During the backwards
17 | pass, gradients from each replica are summed into the original module.
18 |
19 | The batch size should be larger than the number of GPUs used. It should
20 | also be an integer multiple of the number of GPUs so that each chunk is the
21 | same size (so that each GPU processes the same number of samples).
22 |
23 | See also: :ref:`cuda-nn-dataparallel-instead`
24 |
25 | Arbitrary positional and keyword inputs are allowed to be passed into
26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim
27 | specified (default 0). Primitive types will be broadcasted, but all
28 | other types will be a shallow copy and can be corrupted if written to in
29 | the model's forward pass.
30 |
31 | Args:
32 | module: module to be parallelized
33 | device_ids: CUDA devices (default: all devices)
34 | output_device: device location of output (default: device_ids[0])
35 |
36 | Example::
37 |
38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
39 | >>> output = net(input_var)
40 | """
41 |
42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
43 |
44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
45 | super(_DataParallel, self).__init__()
46 |
47 | if not torch.cuda.is_available():
48 | self.module = module
49 | self.device_ids = []
50 | return
51 |
52 | if device_ids is None:
53 | device_ids = list(range(torch.cuda.device_count()))
54 | if output_device is None:
55 | output_device = device_ids[0]
56 | self.dim = dim
57 | self.module = module
58 | self.device_ids = device_ids
59 | self.chunk_sizes = chunk_sizes
60 | self.output_device = output_device
61 | if len(self.device_ids) == 1:
62 | self.module.cuda(device_ids[0])
63 |
64 | def forward(self, *inputs, **kwargs):
65 | if not self.device_ids:
66 | return self.module(*inputs, **kwargs)
67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
68 | if len(self.device_ids) == 1:
69 | return self.module(*inputs[0], **kwargs[0])
70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
71 | outputs = self.parallel_apply(replicas, inputs, kwargs)
72 | return self.gather(outputs, self.output_device)
73 |
74 | def replicate(self, module, device_ids):
75 | return replicate(module, device_ids)
76 |
77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
79 |
80 | def parallel_apply(self, replicas, inputs, kwargs):
81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
82 |
83 | def gather(self, outputs, output_device):
84 | return gather(outputs, output_device, dim=self.dim)
85 |
86 |
87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
89 |
90 | This is the functional version of the DataParallel module.
91 |
92 | Args:
93 | module: the module to evaluate in parallel
94 | inputs: inputs to the module
95 | device_ids: GPU ids on which to replicate module
96 | output_device: GPU location of the output Use -1 to indicate the CPU.
97 | (default: device_ids[0])
98 | Returns:
99 | a Variable containing the result of module(input) located on
100 | output_device
101 | """
102 | if not isinstance(inputs, tuple):
103 | inputs = (inputs,)
104 |
105 | if device_ids is None:
106 | device_ids = list(range(torch.cuda.device_count()))
107 |
108 | if output_device is None:
109 | output_device = device_ids[0]
110 |
111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 | if len(device_ids) == 1:
113 | return module(*inputs[0], **module_kwargs[0])
114 | used_device_ids = device_ids[:len(inputs)]
115 | replicas = replicate(module, used_device_ids)
116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 | return gather(outputs, output_device, dim)
118 |
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 | if chunk_sizes is None:
121 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 | standard_size = True
123 | for i in range(1, len(chunk_sizes)):
124 | if chunk_sizes[i] != chunk_sizes[0]:
125 | standard_size = False
126 | if standard_size:
127 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)
--------------------------------------------------------------------------------
/lib/models/decode.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | from .utils import _gather_feat, _tranpose_and_gather_feat
8 |
9 | def _nms(heat, kernel=3):
10 | pad = (kernel - 1) // 2
11 |
12 | hmax = nn.functional.max_pool2d(
13 | heat, (kernel, kernel), stride=1, padding=pad)
14 | keep = (hmax == heat).float()
15 | return heat * keep
16 |
17 |
18 | def _topk_channel(scores, K=40):
19 | batch, cat, height, width = scores.size()
20 |
21 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
22 |
23 | topk_inds = topk_inds % (height * width)
24 | topk_ys = (topk_inds / width).int().float()
25 | topk_xs = (topk_inds % width).int().float()
26 |
27 | return topk_scores, topk_inds, topk_ys, topk_xs
28 |
29 | def _topk(scores, K=40):
30 | batch, cat, height, width = scores.size()
31 |
32 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
33 |
34 | topk_inds = topk_inds % (height * width)
35 | topk_ys = (topk_inds / width).int().float()
36 | topk_xs = (topk_inds % width).int().float()
37 |
38 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
39 | topk_clses = (topk_ind / K).int()
40 | topk_inds = _gather_feat(
41 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
42 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
43 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
44 |
45 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
46 |
47 |
48 | def mot_decode(heat, wh, reg=None, ltrb=False, K=100):
49 | batch, cat, height, width = heat.size()
50 |
51 | # heat = torch.sigmoid(heat)
52 | # perform nms on heatmaps
53 | heat = _nms(heat)
54 |
55 | scores, inds, clses, ys, xs = _topk(heat, K=K)
56 | if reg is not None:
57 | reg = _tranpose_and_gather_feat(reg, inds)
58 | reg = reg.view(batch, K, 2)
59 | xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
60 | ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
61 | else:
62 | xs = xs.view(batch, K, 1) + 0.5
63 | ys = ys.view(batch, K, 1) + 0.5
64 | wh = _tranpose_and_gather_feat(wh, inds)
65 | if ltrb:
66 | wh = wh.view(batch, K, 4)
67 | else:
68 | wh = wh.view(batch, K, 2)
69 | clses = clses.view(batch, K, 1).float()
70 | scores = scores.view(batch, K, 1)
71 | if ltrb:
72 | bboxes = torch.cat([xs - wh[..., 0:1],
73 | ys - wh[..., 1:2],
74 | xs + wh[..., 2:3],
75 | ys + wh[..., 3:4]], dim=2)
76 | else:
77 | bboxes = torch.cat([xs - wh[..., 0:1] / 2,
78 | ys - wh[..., 1:2] / 2,
79 | xs + wh[..., 0:1] / 2,
80 | ys + wh[..., 1:2] / 2], dim=2)
81 | detections = torch.cat([bboxes, scores, clses], dim=2)
82 |
83 | return detections, inds
84 |
--------------------------------------------------------------------------------
/lib/models/model.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torchvision.models as models
6 | import torch
7 | import torch.nn as nn
8 | import os
9 |
10 | from .networks.dlav0 import get_pose_net as get_dlav0
11 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
12 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
13 | from .networks.resnet_fpn_dcn import get_pose_net as get_pose_net_fpn_dcn
14 | from .networks.pose_hrnet import get_pose_net as get_pose_net_hrnet
15 | from .networks.pose_dla_conv import get_pose_net as get_dla_conv
16 | from .yolo import get_pose_net as get_pose_net_yolo
17 |
18 | _model_factory = {
19 | 'dlav0': get_dlav0, # default DLAup
20 | 'dla': get_dla_dcn,
21 | 'dlaconv': get_dla_conv,
22 | 'resdcn': get_pose_net_dcn,
23 | 'resfpndcn': get_pose_net_fpn_dcn,
24 | 'hrnet': get_pose_net_hrnet,
25 | 'yolo': get_pose_net_yolo
26 | }
27 |
28 | def create_model(arch, heads, head_conv):
29 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
30 | arch = arch[:arch.find('_')] if '_' in arch else arch
31 | get_model = _model_factory[arch]
32 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv)
33 | return model
34 |
35 | def load_model(model, model_path, optimizer=None, resume=False,
36 | lr=None, lr_step=None):
37 | start_epoch = 0
38 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
39 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
40 | state_dict_ = checkpoint['state_dict']
41 | state_dict = {}
42 |
43 | # convert data_parallal to model
44 | for k in state_dict_:
45 | if k.startswith('module') and not k.startswith('module_list'):
46 | state_dict[k[7:]] = state_dict_[k]
47 | else:
48 | state_dict[k] = state_dict_[k]
49 | model_state_dict = model.state_dict()
50 |
51 | # check loaded parameters and created model parameters
52 | msg = 'If you see this, your model does not fully load the ' + \
53 | 'pre-trained weight. Please make sure ' + \
54 | 'you have correctly specified --arch xxx ' + \
55 | 'or set the correct --num_classes for your own dataset.'
56 | for k in state_dict:
57 | if k in model_state_dict:
58 | if state_dict[k].shape != model_state_dict[k].shape:
59 | print('Skip loading parameter {}, required shape{}, '\
60 | 'loaded shape{}. {}'.format(
61 | k, model_state_dict[k].shape, state_dict[k].shape, msg))
62 | state_dict[k] = model_state_dict[k]
63 | else:
64 | print('Drop parameter {}.'.format(k) + msg)
65 | for k in model_state_dict:
66 | if not (k in state_dict):
67 | print('No param {}.'.format(k) + msg)
68 | state_dict[k] = model_state_dict[k]
69 | model.load_state_dict(state_dict, strict=False)
70 |
71 | # resume optimizer parameters
72 | if optimizer is not None and resume:
73 | if 'optimizer' in checkpoint:
74 | optimizer.load_state_dict(checkpoint['optimizer'])
75 | start_epoch = checkpoint['epoch']
76 | start_lr = lr
77 | for step in lr_step:
78 | if start_epoch >= step:
79 | start_lr *= 0.1
80 | for param_group in optimizer.param_groups:
81 | param_group['lr'] = start_lr
82 | print('Resumed optimizer with start lr', start_lr)
83 | else:
84 | print('No optimizer parameters in checkpoint.')
85 | if optimizer is not None:
86 | return model, optimizer, start_epoch
87 | else:
88 | return model
89 |
90 | def save_model(path, epoch, model, optimizer=None):
91 | if isinstance(model, torch.nn.DataParallel):
92 | state_dict = model.module.state_dict()
93 | else:
94 | state_dict = model.state_dict()
95 | data = {'epoch': epoch,
96 | 'state_dict': state_dict}
97 | if not (optimizer is None):
98 | data['optimizer'] = optimizer.state_dict()
99 | torch.save(data, path)
100 |
101 |
--------------------------------------------------------------------------------
/lib/models/networks/__pycache__/dlav0.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/dlav0.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/__pycache__/pose_dla_conv.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/pose_dla_conv.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/__pycache__/pose_dla_dcn.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/pose_dla_dcn.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/__pycache__/pose_hrnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/pose_hrnet.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/__pycache__/resnet_dcn.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/resnet_dcn.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/__pycache__/resnet_fpn_dcn.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/resnet_fpn_dcn.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .default import _C as cfg
2 | from .default import update_config
--------------------------------------------------------------------------------
/lib/models/networks/config/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/config/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/config/__pycache__/default.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/config/__pycache__/default.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/models/networks/config/default.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 |
7 | from yacs.config import CfgNode as CN
8 |
9 |
10 | _C = CN()
11 |
12 | _C.OUTPUT_DIR = ''
13 | _C.LOG_DIR = ''
14 | _C.DATA_DIR = ''
15 | _C.GPUS = (0,)
16 | _C.WORKERS = 4
17 | _C.PRINT_FREQ = 20
18 | _C.AUTO_RESUME = False
19 | _C.PIN_MEMORY = True
20 | _C.RANK = 0
21 |
22 | # Cudnn related params
23 | _C.CUDNN = CN()
24 | _C.CUDNN.BENCHMARK = True
25 | _C.CUDNN.DETERMINISTIC = False
26 | _C.CUDNN.ENABLED = True
27 |
28 | # common params for NETWORK
29 | _C.MODEL = CN()
30 | _C.MODEL.NAME = 'pose_hrnet'
31 | _C.MODEL.INIT_WEIGHTS = True
32 | _C.MODEL.PRETRAINED = ''
33 | _C.MODEL.NUM_JOINTS = 17
34 | _C.MODEL.TAG_PER_JOINT = True
35 | _C.MODEL.TARGET_TYPE = 'gaussian'
36 | _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256
37 | _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32
38 | _C.MODEL.SIGMA = 2
39 | _C.MODEL.EXTRA = CN(new_allowed=True)
40 |
41 | _C.LOSS = CN()
42 | _C.LOSS.USE_OHKM = False
43 | _C.LOSS.TOPK = 8
44 | _C.LOSS.USE_TARGET_WEIGHT = True
45 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
46 |
47 | # DATASET related params
48 | _C.DATASET = CN()
49 | _C.DATASET.ROOT = ''
50 | _C.DATASET.DATASET = 'mpii'
51 | _C.DATASET.TRAIN_SET = 'train'
52 | _C.DATASET.TEST_SET = 'valid'
53 | _C.DATASET.DATA_FORMAT = 'jpg'
54 | _C.DATASET.HYBRID_JOINTS_TYPE = ''
55 | _C.DATASET.SELECT_DATA = False
56 |
57 | # training data augmentation
58 | _C.DATASET.FLIP = True
59 | _C.DATASET.SCALE_FACTOR = 0.25
60 | _C.DATASET.ROT_FACTOR = 30
61 | _C.DATASET.PROB_HALF_BODY = 0.0
62 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8
63 | _C.DATASET.COLOR_RGB = False
64 |
65 | # train
66 | _C.TRAIN = CN()
67 |
68 | _C.TRAIN.LR_FACTOR = 0.1
69 | _C.TRAIN.LR_STEP = [90, 110]
70 | _C.TRAIN.LR = 0.001
71 |
72 | _C.TRAIN.OPTIMIZER = 'adam'
73 | _C.TRAIN.MOMENTUM = 0.9
74 | _C.TRAIN.WD = 0.0001
75 | _C.TRAIN.NESTEROV = False
76 | _C.TRAIN.GAMMA1 = 0.99
77 | _C.TRAIN.GAMMA2 = 0.0
78 |
79 | _C.TRAIN.BEGIN_EPOCH = 0
80 | _C.TRAIN.END_EPOCH = 140
81 |
82 | _C.TRAIN.RESUME = False
83 | _C.TRAIN.CHECKPOINT = ''
84 |
85 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32
86 | _C.TRAIN.SHUFFLE = True
87 |
88 | # testing
89 | _C.TEST = CN()
90 |
91 | # size of images for each device
92 | _C.TEST.BATCH_SIZE_PER_GPU = 32
93 | # Test Model Epoch
94 | _C.TEST.FLIP_TEST = False
95 | _C.TEST.POST_PROCESS = False
96 | _C.TEST.SHIFT_HEATMAP = False
97 |
98 | _C.TEST.USE_GT_BBOX = False
99 |
100 | # nms
101 | _C.TEST.IMAGE_THRE = 0.1
102 | _C.TEST.NMS_THRE = 0.6
103 | _C.TEST.SOFT_NMS = False
104 | _C.TEST.OKS_THRE = 0.5
105 | _C.TEST.IN_VIS_THRE = 0.0
106 | _C.TEST.COCO_BBOX_FILE = ''
107 | _C.TEST.BBOX_THRE = 1.0
108 | _C.TEST.MODEL_FILE = ''
109 |
110 | # debug
111 | _C.DEBUG = CN()
112 | _C.DEBUG.DEBUG = False
113 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False
114 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
115 | _C.DEBUG.SAVE_HEATMAPS_GT = False
116 | _C.DEBUG.SAVE_HEATMAPS_PRED = False
117 |
118 |
119 | def update_config(cfg, cfg_dir):
120 | cfg.defrost()
121 | cfg.merge_from_file(cfg_dir)
122 | cfg.freeze()
123 |
124 |
125 | if __name__ == '__main__':
126 | import sys
127 | with open(sys.argv[1], 'w') as f:
128 | print(_C, file=f)
--------------------------------------------------------------------------------
/lib/models/networks/config/hrnet_w18.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: true
2 | CUDNN:
3 | BENCHMARK: true
4 | DETERMINISTIC: false
5 | ENABLED: true
6 | DATA_DIR: ''
7 | GPUS: (0,1,2,3)
8 | OUTPUT_DIR: 'output'
9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 |
13 | DATASET:
14 | COLOR_RGB: true
15 | DATASET: 'coco'
16 | DATA_FORMAT: jpg
17 | FLIP: true
18 | NUM_JOINTS_HALF_BODY: 8
19 | PROB_HALF_BODY: 0.3
20 | ROOT: 'data/coco/'
21 | ROT_FACTOR: 45
22 | SCALE_FACTOR: 0.35
23 | TEST_SET: 'val2017'
24 | TRAIN_SET: 'train2017'
25 | MODEL:
26 | INIT_WEIGHTS: true
27 | NAME: pose_hrnet
28 | NUM_JOINTS: 17
29 | PRETRAINED: '../models/hrnetv2_w18_imagenet_pretrained.pth'
30 | TARGET_TYPE: gaussian
31 | IMAGE_SIZE:
32 | - 192
33 | - 256
34 | HEATMAP_SIZE:
35 | - 48
36 | - 64
37 | SIGMA: 2
38 | EXTRA:
39 | PRETRAINED_LAYERS:
40 | - 'conv1'
41 | - 'bn1'
42 | - 'conv2'
43 | - 'bn2'
44 | - 'layer1'
45 | - 'transition1'
46 | - 'stage2'
47 | - 'transition2'
48 | - 'stage3'
49 | - 'transition3'
50 | - 'stage4'
51 | FINAL_CONV_KERNEL: 1
52 | STAGE2:
53 | NUM_MODULES: 1
54 | NUM_BRANCHES: 2
55 | BLOCK: BASIC
56 | NUM_BLOCKS:
57 | - 4
58 | - 4
59 | NUM_CHANNELS:
60 | - 18
61 | - 36
62 | FUSE_METHOD: SUM
63 | STAGE3:
64 | NUM_MODULES: 4
65 | NUM_BRANCHES: 3
66 | BLOCK: BASIC
67 | NUM_BLOCKS:
68 | - 4
69 | - 4
70 | - 4
71 | NUM_CHANNELS:
72 | - 18
73 | - 36
74 | - 72
75 | FUSE_METHOD: SUM
76 | STAGE4:
77 | NUM_MODULES: 3
78 | NUM_BRANCHES: 4
79 | BLOCK: BASIC
80 | NUM_BLOCKS:
81 | - 4
82 | - 4
83 | - 4
84 | - 4
85 | NUM_CHANNELS:
86 | - 18
87 | - 36
88 | - 72
89 | - 144
90 | FUSE_METHOD: SUM
91 | LOSS:
92 | USE_TARGET_WEIGHT: true
93 | TRAIN:
94 | BATCH_SIZE_PER_GPU: 32
95 | SHUFFLE: true
96 | BEGIN_EPOCH: 0
97 | END_EPOCH: 210
98 | OPTIMIZER: adam
99 | LR: 0.001
100 | LR_FACTOR: 0.1
101 | LR_STEP:
102 | - 170
103 | - 200
104 | WD: 0.0001
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 | MOMENTUM: 0.9
108 | NESTEROV: false
109 | TEST:
110 | BATCH_SIZE_PER_GPU: 32
111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 | BBOX_THRE: 1.0
113 | IMAGE_THRE: 0.0
114 | IN_VIS_THRE: 0.2
115 | MODEL_FILE: ''
116 | NMS_THRE: 1.0
117 | OKS_THRE: 0.9
118 | USE_GT_BBOX: true
119 | FLIP_TEST: true
120 | POST_PROCESS: true
121 | SHIFT_HEATMAP: true
122 | DEBUG:
123 | DEBUG: true
124 | SAVE_BATCH_IMAGES_GT: true
125 | SAVE_BATCH_IMAGES_PRED: true
126 | SAVE_HEATMAPS_GT: true
127 | SAVE_HEATMAPS_PRED: true
--------------------------------------------------------------------------------
/lib/models/networks/config/hrnet_w32.yaml:
--------------------------------------------------------------------------------
1 | AUTO_RESUME: true
2 | CUDNN:
3 | BENCHMARK: true
4 | DETERMINISTIC: false
5 | ENABLED: true
6 | DATA_DIR: ''
7 | GPUS: (0,1,2,3)
8 | OUTPUT_DIR: 'output'
9 | LOG_DIR: 'log'
10 | WORKERS: 24
11 | PRINT_FREQ: 100
12 |
13 | DATASET:
14 | COLOR_RGB: true
15 | DATASET: 'coco'
16 | DATA_FORMAT: jpg
17 | FLIP: true
18 | NUM_JOINTS_HALF_BODY: 8
19 | PROB_HALF_BODY: 0.3
20 | ROOT: 'data/coco/'
21 | ROT_FACTOR: 45
22 | SCALE_FACTOR: 0.35
23 | TEST_SET: 'val2017'
24 | TRAIN_SET: 'train2017'
25 | MODEL:
26 | INIT_WEIGHTS: true
27 | NAME: pose_hrnet
28 | NUM_JOINTS: 17
29 | PRETRAINED: '../models/hrnetv2_w32_imagenet_pretrained.pth'
30 | TARGET_TYPE: gaussian
31 | IMAGE_SIZE:
32 | - 192
33 | - 256
34 | HEATMAP_SIZE:
35 | - 48
36 | - 64
37 | SIGMA: 2
38 | EXTRA:
39 | PRETRAINED_LAYERS:
40 | - 'conv1'
41 | - 'bn1'
42 | - 'conv2'
43 | - 'bn2'
44 | - 'layer1'
45 | - 'transition1'
46 | - 'stage2'
47 | - 'transition2'
48 | - 'stage3'
49 | - 'transition3'
50 | - 'stage4'
51 | FINAL_CONV_KERNEL: 1
52 | STAGE2:
53 | NUM_MODULES: 1
54 | NUM_BRANCHES: 2
55 | BLOCK: BASIC
56 | NUM_BLOCKS:
57 | - 4
58 | - 4
59 | NUM_CHANNELS:
60 | - 32
61 | - 64
62 | FUSE_METHOD: SUM
63 | STAGE3:
64 | NUM_MODULES: 4
65 | NUM_BRANCHES: 3
66 | BLOCK: BASIC
67 | NUM_BLOCKS:
68 | - 4
69 | - 4
70 | - 4
71 | NUM_CHANNELS:
72 | - 32
73 | - 64
74 | - 128
75 | FUSE_METHOD: SUM
76 | STAGE4:
77 | NUM_MODULES: 3
78 | NUM_BRANCHES: 4
79 | BLOCK: BASIC
80 | NUM_BLOCKS:
81 | - 4
82 | - 4
83 | - 4
84 | - 4
85 | NUM_CHANNELS:
86 | - 32
87 | - 64
88 | - 128
89 | - 256
90 | FUSE_METHOD: SUM
91 | LOSS:
92 | USE_TARGET_WEIGHT: true
93 | TRAIN:
94 | BATCH_SIZE_PER_GPU: 32
95 | SHUFFLE: true
96 | BEGIN_EPOCH: 0
97 | END_EPOCH: 210
98 | OPTIMIZER: adam
99 | LR: 0.001
100 | LR_FACTOR: 0.1
101 | LR_STEP:
102 | - 170
103 | - 200
104 | WD: 0.0001
105 | GAMMA1: 0.99
106 | GAMMA2: 0.0
107 | MOMENTUM: 0.9
108 | NESTEROV: false
109 | TEST:
110 | BATCH_SIZE_PER_GPU: 32
111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
112 | BBOX_THRE: 1.0
113 | IMAGE_THRE: 0.0
114 | IN_VIS_THRE: 0.2
115 | MODEL_FILE: ''
116 | NMS_THRE: 1.0
117 | OKS_THRE: 0.9
118 | USE_GT_BBOX: true
119 | FLIP_TEST: true
120 | POST_PROCESS: true
121 | SHIFT_HEATMAP: true
122 | DEBUG:
123 | DEBUG: true
124 | SAVE_BATCH_IMAGES_GT: true
125 | SAVE_BATCH_IMAGES_PRED: true
126 | SAVE_HEATMAPS_GT: true
127 | SAVE_HEATMAPS_PRED: true
--------------------------------------------------------------------------------
/lib/models/networks/config/yolov5s.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 80 # number of classes
3 | depth_multiple: 0.33 # model depth multiple
4 | width_multiple: 0.50 # layer channel multiple
5 |
6 |
7 | # YOLOv5 backbone
8 | backbone:
9 | # [from, number, module, args]
10 | [[-1, 1, Focus, [64, 3]], # 0-P1/2
11 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4
12 | [-1, 3, C3, [128]],
13 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8
14 | [-1, 9, C3, [256]],
15 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16
16 | [-1, 9, C3, [512]],
17 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
18 | [-1, 1, SPP, [1024, [5, 9, 13]]],
19 | [-1, 3, C3, [1024, False]], # 9
20 |
21 | [ -1, 1, Conv, [ 512, 1, 1 ] ], # 10 upsample /16
22 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
23 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
24 | [ -1, 3, C3, [ 512, False ] ], # 13
25 |
26 | [ -1, 1, Conv, [ 256, 1, 1 ] ], # 14 upsample /8
27 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
28 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
29 | [ -1, 3, C3, [ 256, False ] ], # 17
30 |
31 | [ -1, 1, Conv, [ 128, 1, 1 ] ], # 18 upsample /4
32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
33 | [ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2
34 | [ -1, 3, C3, [ 128, False ] ] # 21
35 | ]
36 |
37 |
--------------------------------------------------------------------------------
/lib/models/scatter_gather.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | from torch.nn.parallel._functions import Scatter, Gather
4 |
5 |
6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
7 | r"""
8 | Slices variables into approximately equal chunks and
9 | distributes them across given GPUs. Duplicates
10 | references to objects that are not variables. Does not
11 | support Tensors.
12 | """
13 | def scatter_map(obj):
14 | if isinstance(obj, Variable):
15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 | if isinstance(obj, tuple):
18 | return list(zip(*map(scatter_map, obj)))
19 | if isinstance(obj, list):
20 | return list(map(list, zip(*map(scatter_map, obj))))
21 | if isinstance(obj, dict):
22 | return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 | return [obj for targets in target_gpus]
24 |
25 | return scatter_map(inputs)
26 |
27 |
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 | r"""Scatter with support for kwargs dictionary"""
30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 | if len(inputs) < len(kwargs):
33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 | elif len(kwargs) < len(inputs):
35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 | inputs = tuple(inputs)
37 | kwargs = tuple(kwargs)
38 | return inputs, kwargs
39 |
--------------------------------------------------------------------------------
/lib/models/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 | def _sigmoid(x):
9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 | return y
11 |
12 | def _gather_feat(feat, ind, mask=None):
13 | dim = feat.size(2)
14 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
15 | feat = feat.gather(1, ind)
16 | if mask is not None:
17 | mask = mask.unsqueeze(2).expand_as(feat)
18 | feat = feat[mask]
19 | feat = feat.view(-1, dim)
20 | return feat
21 |
22 | def _tranpose_and_gather_feat(feat, ind):
23 | feat = feat.permute(0, 2, 3, 1).contiguous()
24 | feat = feat.view(feat.size(0), -1, feat.size(3))
25 | feat = _gather_feat(feat, ind)
26 | return feat
27 |
28 | def flip_tensor(x):
29 | return torch.flip(x, [3])
30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 | # return torch.from_numpy(tmp).to(x.device)
32 |
33 | def flip_lr(x, flip_idx):
34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 | shape = tmp.shape
36 | for e in flip_idx:
37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 |
41 | def flip_lr_off(x, flip_idx):
42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 | shape = tmp.shape
44 | tmp = tmp.reshape(tmp.shape[0], 17, 2,
45 | tmp.shape[2], tmp.shape[3])
46 | tmp[:, :, 0, :, :] *= -1
47 | for e in flip_idx:
48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device)
--------------------------------------------------------------------------------
/lib/models/yolo.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 | import sys
4 | import os
5 | from copy import deepcopy
6 | from pathlib import Path
7 |
8 | from .common import *
9 |
10 |
11 | class Detect(nn.Module):
12 | stride = None # strides computed during build
13 | onnx_dynamic = False # ONNX export parameter
14 |
15 | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer
16 | super(Detect, self).__init__()
17 | self.nc = nc # number of classes
18 | self.no = nc + 5 # number of outputs per anchor
19 | self.nl = len(anchors) # number of detection layers
20 | self.na = len(anchors[0]) // 2 # number of anchors
21 | self.grid = [torch.zeros(1)] * self.nl # init grid
22 | a = torch.tensor(anchors).float().view(self.nl, -1, 2)
23 | self.register_buffer('anchors', a) # shape(nl,na,2)
24 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
25 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
26 | self.inplace = inplace # use in-place ops (e.g. slice assignment)
27 |
28 | def forward(self, x):
29 | # x = x.copy() # for profiling
30 | z = [] # inference output
31 | for i in range(self.nl):
32 | x[i] = self.m[i](x[i]) # conv
33 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
34 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
35 |
36 | if not self.training: # inference
37 | if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
38 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
39 |
40 | y = x[i].sigmoid()
41 | if self.inplace:
42 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
43 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
44 | else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
45 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
46 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh
47 | y = torch.cat((xy, wh, y[..., 4:]), -1)
48 | z.append(y.view(bs, -1, self.no))
49 |
50 | return x if self.training else (torch.cat(z, 1), x)
51 |
52 | @staticmethod
53 | def _make_grid(nx=20, ny=20):
54 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
55 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
56 |
57 |
58 | def fill_fc_weights(layers):
59 | for m in layers.modules():
60 | if isinstance(m, nn.Conv2d):
61 | if m.bias is not None:
62 | nn.init.constant_(m.bias, 0)
63 |
64 |
65 | class Model(nn.Module):
66 | def __init__(self, config='config/yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes
67 | super(Model, self).__init__()
68 | print(config)
69 | if isinstance(config, dict):
70 | self.yaml = config # model dict
71 | else: # is *.yaml
72 | import yaml # for torch hub
73 | self.yaml_file = Path(config).name
74 | with open(config) as f:
75 | self.yaml = yaml.safe_load(f) # model dict
76 |
77 | # Define model
78 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
79 | if nc and nc != self.yaml['nc']:
80 | self.yaml['nc'] = nc # override yaml value
81 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
82 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names
83 | self.inplace = self.yaml.get('inplace', True)
84 | # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
85 |
86 | def forward(self, x, augment=False, profile=False):
87 | return self.forward_once(x, profile) # single-scale inference, train
88 |
89 | def forward_once(self, x, profile=False):
90 | y, dt = [], [] # outputs
91 | for m in self.model:
92 | if m.f != -1: # if not from previous layer
93 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
94 |
95 | x = m(x) # run
96 | y.append(x if m.i in self.save else None) # save output
97 |
98 | return x
99 |
100 |
101 | def parse_model(d, ch): # model_dict, input_channels(3)
102 | nc, gd, gw = d['nc'], d['depth_multiple'], d['width_multiple']
103 |
104 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
105 | for i, (f, n, m, args) in enumerate(d['backbone']): # from, number, module, args
106 | m = eval(m) if isinstance(m, str) else m # eval strings
107 | for j, a in enumerate(args):
108 | try:
109 | args[j] = eval(a) if isinstance(a, str) else a # eval strings
110 | except:
111 | pass
112 |
113 | n = max(round(n * gd), 1) if n > 1 else n # depth gain
114 | if m in [Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, C3, C3TR, DeConv, DeConvDCN]:
115 | c1, c2 = ch[f], args[0]
116 | c2 = make_divisible(c2 * gw, 8)
117 |
118 | args = [c1, c2, *args[1:]]
119 | if m in [BottleneckCSP, C3, C3TR]:
120 | args.insert(2, n) # number of repeats
121 | n = 1
122 | elif m is nn.BatchNorm2d:
123 | args = [ch[f]]
124 | elif m is Concat:
125 | c2 = sum([ch[x] for x in f])
126 | elif m is Contract:
127 | c2 = ch[f] * args[0] ** 2
128 | elif m is Expand:
129 | c2 = ch[f] // args[0] ** 2
130 | else:
131 | c2 = ch[f]
132 |
133 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
134 | t = str(m)[8:-2].replace('__main__.', '') # module type
135 | np = sum([x.numel() for x in m_.parameters()]) # number params
136 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
137 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
138 | layers.append(m_)
139 | if i == 0:
140 | ch = []
141 | ch.append(c2)
142 | return nn.Sequential(*layers), sorted(save)
143 |
144 |
145 | class PoseYOLOv5s(nn.Module):
146 | def __init__(self, heads, config_file):
147 | self.heads = heads
148 | super(PoseYOLOv5s, self).__init__()
149 | self.backbone = Model(config_file)
150 | for head in sorted(self.heads):
151 | num_output = self.heads[head]
152 | fc = nn.Sequential(
153 | nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=True),
154 | nn.SiLU(),
155 | nn.Conv2d(64, num_output, kernel_size=1, stride=1, padding=0))
156 | self.__setattr__(head, fc)
157 | if 'hm' in head:
158 | fc[-1].bias.data.fill_(-2.19)
159 | else:
160 | fill_fc_weights(fc)
161 |
162 | def forward(self, x):
163 | x = self.backbone(x)
164 | ret = {}
165 | for head in self.heads:
166 | ret[head] = self.__getattr__(head)(x)
167 | return [ret]
168 |
169 |
170 | def get_pose_net(num_layers, heads, head_conv):
171 | config_file = '../src/lib/models/networks/config/yolov5s.yaml'
172 | pretrained = '../models/yolov5s.pt'
173 | model = PoseYOLOv5s(heads, config_file)
174 | initialize_weights(model, pretrained)
175 | return model
176 |
177 |
178 | def intersect_dicts(da, db, exclude=()):
179 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
180 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
181 |
182 |
183 | def initialize_weights(model, pretrained=''):
184 | for i, m in enumerate(model.modules()):
185 | t = type(m)
186 | if t is nn.Conv2d:
187 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
188 | elif t is nn.BatchNorm2d:
189 | m.eps = 1e-3
190 | m.momentum = 0.03
191 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
192 | m.inplace = True
193 |
194 | for head in model.heads:
195 | final_layer = model.__getattr__(head)
196 | for i, m in enumerate(final_layer.modules()):
197 | if isinstance(m, nn.Conv2d):
198 | if m.weight.shape[0] == model.heads[head]:
199 | if 'hm' in head:
200 | nn.init.constant_(m.bias, -2.19)
201 | else:
202 | nn.init.normal_(m.weight, std=0.001)
203 | nn.init.constant_(m.bias, 0)
204 |
205 | if os.path.isfile(pretrained):
206 | ckpt = torch.load(pretrained) # load checkpoint
207 | state_dict = ckpt['model'].float().state_dict() # to FP32
208 | state_dict = intersect_dicts(state_dict, model.backbone.state_dict()) # intersect
209 | model.backbone.load_state_dict(state_dict, strict=False) # load
210 | print('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), pretrained)) # report
211 |
212 |
213 | def make_divisible(x, divisor):
214 | # Returns x evenly divisible by divisor
215 | return math.ceil(x / divisor) * divisor
--------------------------------------------------------------------------------
/lib/tracker/__pycache__/basetrack.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracker/__pycache__/basetrack.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracker/__pycache__/matching.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracker/__pycache__/matching.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracker/__pycache__/multitracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracker/__pycache__/multitracker.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracker/basetrack.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from collections import OrderedDict
3 |
4 |
5 | class TrackState(object):
6 | New = 0
7 | Tracked = 1
8 | Lost = 2
9 | Removed = 3
10 |
11 |
12 | class BaseTrack(object):
13 | _count = 0
14 |
15 | track_id = 0
16 | is_activated = False
17 | state = TrackState.New
18 |
19 | history = OrderedDict()
20 | features = []
21 | curr_feature = None
22 | score = 0
23 | start_frame = 0
24 | frame_id = 0
25 | time_since_update = 0
26 |
27 | # multi-camera
28 | location = (np.inf, np.inf)
29 |
30 | @property
31 | def end_frame(self):
32 | return self.frame_id
33 |
34 | @staticmethod
35 | def next_id():
36 | BaseTrack._count += 1
37 | return BaseTrack._count
38 |
39 | def activate(self, *args):
40 | raise NotImplementedError
41 |
42 | def predict(self):
43 | raise NotImplementedError
44 |
45 | def update(self, *args, **kwargs):
46 | raise NotImplementedError
47 |
48 | def mark_lost(self):
49 | self.state = TrackState.Lost
50 |
51 | def mark_removed(self):
52 | self.state = TrackState.Removed
--------------------------------------------------------------------------------
/lib/tracker/matching.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import scipy
4 | import lap
5 | from scipy.spatial.distance import cdist
6 |
7 | from cython_bbox import bbox_overlaps as bbox_ious
8 | from lib.tracking_utils import kalman_filter
9 | import time
10 |
11 | def merge_matches(m1, m2, shape):
12 | O,P,Q = shape
13 | m1 = np.asarray(m1)
14 | m2 = np.asarray(m2)
15 |
16 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
17 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
18 |
19 | mask = M1*M2
20 | match = mask.nonzero()
21 | match = list(zip(match[0], match[1]))
22 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
23 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
24 |
25 | return match, unmatched_O, unmatched_Q
26 |
27 |
28 | def _indices_to_matches(cost_matrix, indices, thresh):
29 | matched_cost = cost_matrix[tuple(zip(*indices))]
30 | matched_mask = (matched_cost <= thresh)
31 |
32 | matches = indices[matched_mask]
33 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
34 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
35 |
36 | return matches, unmatched_a, unmatched_b
37 |
38 |
39 | def linear_assignment(cost_matrix, thresh):
40 | if cost_matrix.size == 0:
41 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))
42 | matches, unmatched_a, unmatched_b = [], [], []
43 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
44 | for ix, mx in enumerate(x):
45 | if mx >= 0:
46 | matches.append([ix, mx])
47 | unmatched_a = np.where(x < 0)[0]
48 | unmatched_b = np.where(y < 0)[0]
49 | matches = np.asarray(matches)
50 | return matches, unmatched_a, unmatched_b
51 |
52 |
53 | def ious(atlbrs, btlbrs):
54 | """
55 | Compute cost based on IoU
56 | :type atlbrs: list[tlbr] | np.ndarray
57 | :type atlbrs: list[tlbr] | np.ndarray
58 |
59 | :rtype ious np.ndarray
60 | """
61 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
62 | if ious.size == 0:
63 | return ious
64 |
65 | ious = bbox_ious(
66 | np.ascontiguousarray(atlbrs, dtype=np.float),
67 | np.ascontiguousarray(btlbrs, dtype=np.float)
68 | )
69 |
70 | return ious
71 |
72 |
73 | def iou_distance(atracks, btracks):
74 | """
75 | Compute cost based on IoU
76 | :type atracks: list[STrack]
77 | :type btracks: list[STrack]
78 |
79 | :rtype cost_matrix np.ndarray
80 | """
81 |
82 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
83 | atlbrs = atracks
84 | btlbrs = btracks
85 | else:
86 | atlbrs = [track.tlbr for track in atracks]
87 | btlbrs = [track.tlbr for track in btracks]
88 | _ious = ious(atlbrs, btlbrs)
89 | cost_matrix = 1 - _ious
90 |
91 | return cost_matrix
92 |
93 | def embedding_distance(tracks, detections, metric='cosine'):
94 | """
95 | :param tracks: list[STrack]
96 | :param detections: list[BaseTrack]
97 | :param metric:
98 | :return: cost_matrix np.ndarray
99 | """
100 |
101 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
102 | if cost_matrix.size == 0:
103 | return cost_matrix
104 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
105 | #for i, track in enumerate(tracks):
106 | #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
107 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
108 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features
109 | return cost_matrix
110 |
111 |
112 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
113 | if cost_matrix.size == 0:
114 | return cost_matrix
115 | gating_dim = 2 if only_position else 4
116 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
117 | measurements = np.asarray([det.to_xyah() for det in detections])
118 | for row, track in enumerate(tracks):
119 | gating_distance = kf.gating_distance(
120 | track.mean, track.covariance, measurements, only_position)
121 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
122 | return cost_matrix
123 |
124 |
125 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
126 | if cost_matrix.size == 0:
127 | return cost_matrix
128 | gating_dim = 2 if only_position else 4
129 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
130 | measurements = np.asarray([det.to_xyah() for det in detections])
131 | for row, track in enumerate(tracks):
132 | gating_distance = kf.gating_distance(
133 | track.mean, track.covariance, measurements, only_position, metric='maha')
134 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
135 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
136 | return cost_matrix
137 |
--------------------------------------------------------------------------------
/lib/tracking_utils/__pycache__/evaluation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/evaluation.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracking_utils/__pycache__/io.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/io.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracking_utils/__pycache__/kalman_filter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/kalman_filter.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracking_utils/__pycache__/log.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/log.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracking_utils/__pycache__/timer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/timer.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracking_utils/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracking_utils/__pycache__/visualization.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/visualization.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/tracking_utils/evaluation.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import copy
4 | import motmetrics as mm
5 | mm.lap.default_solver = 'lap'
6 |
7 | from lib.tracking_utils.io import read_results, unzip_objs
8 |
9 |
10 | class Evaluator(object):
11 |
12 | def __init__(self, data_root, seq_name, data_type):
13 | self.data_root = data_root
14 | self.seq_name = seq_name
15 | self.data_type = data_type
16 |
17 | self.load_annotations()
18 | self.reset_accumulator()
19 |
20 | def load_annotations(self):
21 | assert self.data_type == 'mot'
22 |
23 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
24 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
25 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
26 |
27 | def reset_accumulator(self):
28 | self.acc = mm.MOTAccumulator(auto_id=True)
29 |
30 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
31 | # results
32 | trk_tlwhs = np.copy(trk_tlwhs)
33 | trk_ids = np.copy(trk_ids)
34 |
35 | # gts
36 | gt_objs = self.gt_frame_dict.get(frame_id, [])
37 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
38 |
39 | # ignore boxes
40 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
41 | ignore_tlwhs = unzip_objs(ignore_objs)[0]
42 |
43 | # remove ignored results
44 | keep = np.ones(len(trk_tlwhs), dtype=bool)
45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
46 | if len(iou_distance) > 0:
47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
49 | match_ious = iou_distance[match_is, match_js]
50 |
51 | match_js = np.asarray(match_js, dtype=int)
52 | match_js = match_js[np.logical_not(np.isnan(match_ious))]
53 | keep[match_js] = False
54 | trk_tlwhs = trk_tlwhs[keep]
55 | trk_ids = trk_ids[keep]
56 | #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
57 | #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
58 | #match_ious = iou_distance[match_is, match_js]
59 |
60 | #match_js = np.asarray(match_js, dtype=int)
61 | #match_js = match_js[np.logical_not(np.isnan(match_ious))]
62 | #keep[match_js] = False
63 | #trk_tlwhs = trk_tlwhs[keep]
64 | #trk_ids = trk_ids[keep]
65 |
66 | # get distance matrix
67 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
68 |
69 | # acc
70 | self.acc.update(gt_ids, trk_ids, iou_distance)
71 |
72 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
73 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics
74 | else:
75 | events = None
76 | return events
77 |
78 | def eval_file(self, filename):
79 | self.reset_accumulator()
80 |
81 | result_frame_dict = read_results(filename, self.data_type, is_gt=False)
82 | #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
83 | frames = sorted(list(set(result_frame_dict.keys())))
84 | for frame_id in frames:
85 | trk_objs = result_frame_dict.get(frame_id, [])
86 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
87 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
88 |
89 | return self.acc
90 |
91 | @staticmethod
92 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
93 | names = copy.deepcopy(names)
94 | if metrics is None:
95 | metrics = mm.metrics.motchallenge_metrics
96 | metrics = copy.deepcopy(metrics)
97 |
98 | mh = mm.metrics.create()
99 | summary = mh.compute_many(
100 | accs,
101 | metrics=metrics,
102 | names=names,
103 | generate_overall=True
104 | )
105 |
106 | return summary
107 |
108 | @staticmethod
109 | def save_summary(summary, filename):
110 | import pandas as pd
111 | writer = pd.ExcelWriter(filename)
112 | summary.to_excel(writer)
113 | writer.save()
114 |
--------------------------------------------------------------------------------
/lib/tracking_utils/io.py:
--------------------------------------------------------------------------------
1 | import os
2 | from typing import Dict
3 | import numpy as np
4 |
5 | from lib.tracking_utils.log import logger
6 |
7 |
8 | def write_results(filename, results_dict: Dict, data_type: str):
9 | if not filename:
10 | return
11 | path = os.path.dirname(filename)
12 | if not os.path.exists(path):
13 | os.makedirs(path)
14 |
15 | if data_type in ('mot', 'mcmot', 'lab'):
16 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
17 | elif data_type == 'kitti':
18 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
19 | else:
20 | raise ValueError(data_type)
21 |
22 | with open(filename, 'w') as f:
23 | for frame_id, frame_data in results_dict.items():
24 | if data_type == 'kitti':
25 | frame_id -= 1
26 | for tlwh, track_id in frame_data:
27 | if track_id < 0:
28 | continue
29 | x1, y1, w, h = tlwh
30 | x2, y2 = x1 + w, y1 + h
31 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
32 | f.write(line)
33 | logger.info('Save results to {}'.format(filename))
34 |
35 |
36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
37 | if data_type in ('mot', 'lab'):
38 | read_fun = read_mot_results
39 | else:
40 | raise ValueError('Unknown data type: {}'.format(data_type))
41 |
42 | return read_fun(filename, is_gt, is_ignore)
43 |
44 |
45 | """
46 | labels={'ped', ... % 1
47 | 'person_on_vhcl', ... % 2
48 | 'car', ... % 3
49 | 'bicycle', ... % 4
50 | 'mbike', ... % 5
51 | 'non_mot_vhcl', ... % 6
52 | 'static_person', ... % 7
53 | 'distractor', ... % 8
54 | 'occluder', ... % 9
55 | 'occluder_on_grnd', ... %10
56 | 'occluder_full', ... % 11
57 | 'reflection', ... % 12
58 | 'crowd' ... % 13
59 | };
60 | """
61 |
62 |
63 | def read_mot_results(filename, is_gt, is_ignore):
64 | valid_labels = {1}
65 | ignore_labels = {2, 7, 8, 12}
66 | results_dict = dict()
67 | if os.path.isfile(filename):
68 | with open(filename, 'r') as f:
69 | for line in f.readlines():
70 | linelist = line.split(',')
71 | if len(linelist) < 7:
72 | continue
73 | fid = int(linelist[0])
74 | if fid < 1:
75 | continue
76 | results_dict.setdefault(fid, list())
77 |
78 | box_size = float(linelist[4]) * float(linelist[5])
79 |
80 | if is_gt:
81 | if 'MOT16-' in filename or 'MOT17-' in filename:
82 | label = int(float(linelist[7]))
83 | mark = int(float(linelist[6]))
84 | if mark == 0 or label not in valid_labels:
85 | continue
86 | score = 1
87 | elif is_ignore:
88 | if 'MOT16-' in filename or 'MOT17-' in filename:
89 | label = int(float(linelist[7]))
90 | vis_ratio = float(linelist[8])
91 | if label not in ignore_labels and vis_ratio >= 0:
92 | continue
93 | else:
94 | continue
95 | score = 1
96 | else:
97 | score = float(linelist[6])
98 |
99 | #if box_size > 7000:
100 | #if box_size <= 7000 or box_size >= 15000:
101 | #if box_size < 15000:
102 | #continue
103 |
104 | tlwh = tuple(map(float, linelist[2:6]))
105 | target_id = int(linelist[1])
106 |
107 | results_dict[fid].append((tlwh, target_id, score))
108 |
109 | return results_dict
110 |
111 |
112 | def unzip_objs(objs):
113 | if len(objs) > 0:
114 | tlwhs, ids, scores = zip(*objs)
115 | else:
116 | tlwhs, ids, scores = [], [], []
117 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
118 |
119 | return tlwhs, ids, scores
--------------------------------------------------------------------------------
/lib/tracking_utils/log.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 |
4 | def get_logger(name='root'):
5 | formatter = logging.Formatter(
6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
8 |
9 | handler = logging.StreamHandler()
10 | handler.setFormatter(formatter)
11 |
12 | logger = logging.getLogger(name)
13 | logger.setLevel(logging.DEBUG)
14 | logger.addHandler(handler)
15 | return logger
16 |
17 |
18 | logger = get_logger('root')
19 |
--------------------------------------------------------------------------------
/lib/tracking_utils/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from lib.tracking_utils import _C
4 |
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 |
--------------------------------------------------------------------------------
/lib/tracking_utils/parse_config.py:
--------------------------------------------------------------------------------
1 | def parse_model_cfg(path):
2 | """Parses the yolo-v3 layer configuration file and returns module definitions"""
3 | file = open(path, 'r')
4 | lines = file.read().split('\n')
5 | lines = [x for x in lines if x and not x.startswith('#')]
6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
7 | module_defs = []
8 | for line in lines:
9 | if line.startswith('['): # This marks the start of a new block
10 | module_defs.append({})
11 | module_defs[-1]['type'] = line[1:-1].rstrip()
12 | if module_defs[-1]['type'] == 'convolutional':
13 | module_defs[-1]['batch_normalize'] = 0
14 | else:
15 | key, value = line.split("=")
16 | value = value.strip()
17 | module_defs[-1][key.rstrip()] = value.strip()
18 |
19 | return module_defs
20 |
21 |
22 | def parse_data_cfg(path):
23 | """Parses the data configuration file"""
24 | options = dict()
25 | options['gpus'] = '0'
26 | options['num_workers'] = '10'
27 | with open(path, 'r') as fp:
28 | lines = fp.readlines()
29 | for line in lines:
30 | line = line.strip()
31 | if line == '' or line.startswith('#'):
32 | continue
33 | key, value = line.split('=')
34 | options[key.strip()] = value.strip()
35 | return options
36 |
--------------------------------------------------------------------------------
/lib/tracking_utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 | def __init__(self):
14 | self.total_time = 0.
15 | self.calls = 0
16 | self.start_time = 0.
17 | self.diff = 0.
18 | self.average_time = 0.
19 |
20 | self.duration = 0.
21 |
22 | def tic(self):
23 | # using time.time instead of time.clock because time time.clock
24 | # does not normalize for multithreading
25 | self.start_time = time.time()
26 |
27 | def toc(self, average=True):
28 | self.diff = time.time() - self.start_time
29 | self.total_time += self.diff
30 | self.calls += 1
31 | self.average_time = self.total_time / self.calls
32 | if average:
33 | self.duration = self.average_time
34 | else:
35 | self.duration = self.diff
36 | return self.duration
37 |
38 | def clear(self):
39 | self.total_time = 0.
40 | self.calls = 0
41 | self.start_time = 0.
42 | self.diff = 0.
43 | self.average_time = 0.
44 | self.duration = 0.
45 |
46 |
--------------------------------------------------------------------------------
/lib/tracking_utils/visualization.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 | def tlwhs_to_tlbrs(tlwhs):
6 | tlbrs = np.copy(tlwhs)
7 | if len(tlbrs) == 0:
8 | return tlbrs
9 | tlbrs[:, 2] += tlwhs[:, 0]
10 | tlbrs[:, 3] += tlwhs[:, 1]
11 | return tlbrs
12 |
13 |
14 | def get_color(idx):
15 | idx = idx * 3
16 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
17 |
18 | return color
19 |
20 |
21 | def resize_image(image, max_size=800):
22 | if max(image.shape[:2]) > max_size:
23 | scale = float(max_size) / max(image.shape[:2])
24 | image = cv2.resize(image, None, fx=scale, fy=scale)
25 | return image
26 |
27 |
28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None):
29 | im = np.ascontiguousarray(np.copy(image))
30 | im_h, im_w = im.shape[:2]
31 |
32 | top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
33 |
34 | text_scale = max(1, image.shape[1] / 1600.)
35 | text_thickness = 2
36 | line_thickness = max(1, int(image.shape[1] / 500.))
37 |
38 | radius = max(5, int(im_w/140.))
39 | cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
40 | (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2)
41 |
42 | for i, tlwh in enumerate(tlwhs):
43 | x1, y1, w, h = tlwh
44 | intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
45 | obj_id = int(obj_ids[i])
46 | id_text = '{}'.format(int(obj_id))
47 | if ids2 is not None:
48 | id_text = id_text + ', {}'.format(int(ids2[i]))
49 | _line_thickness = 1 if obj_id <= 0 else line_thickness
50 | color = get_color(abs(obj_id))
51 | cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
52 | cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255),
53 | thickness=text_thickness)
54 | return im
55 |
56 |
57 | def plot_trajectory(image, tlwhs, track_ids):
58 | image = image.copy()
59 | for one_tlwhs, track_id in zip(tlwhs, track_ids):
60 | color = get_color(int(track_id))
61 | for tlwh in one_tlwhs:
62 | x1, y1, w, h = tuple(map(int, tlwh))
63 | cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
64 |
65 | return image
66 |
67 |
68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
69 | im = np.copy(image)
70 | text_scale = max(1, image.shape[1] / 800.)
71 | thickness = 2 if text_scale > 1.3 else 1
72 | for i, det in enumerate(tlbrs):
73 | x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
74 | if len(det) >= 7:
75 | label = 'det' if det[5] > 0 else 'trk'
76 | if ids is not None:
77 | text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
78 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
79 | thickness=thickness)
80 | else:
81 | text = '{}# {:.2f}'.format(label, det[6])
82 |
83 | if scores is not None:
84 | text = '{:.2f}'.format(scores[i])
85 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
86 | thickness=thickness)
87 |
88 | cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
89 |
90 | return im
91 |
--------------------------------------------------------------------------------
/lib/trains/base_trainer.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import time
6 | import torch
7 | from progress.bar import Bar
8 | from lib.models.data_parallel import DataParallel
9 | from lib.utils.utils import AverageMeter
10 |
11 |
12 | class ModleWithLoss(torch.nn.Module):
13 | def __init__(self, model, loss):
14 | super(ModleWithLoss, self).__init__()
15 | self.model = model
16 | self.loss = loss
17 |
18 | def forward(self, batch):
19 | outputs = self.model(batch['input'])
20 | loss, loss_stats = self.loss(outputs, batch)
21 | return outputs[-1], loss, loss_stats
22 |
23 | class BaseTrainer(object):
24 | def __init__(
25 | self, opt, model, optimizer=None):
26 | self.opt = opt
27 | self.optimizer = optimizer
28 | self.loss_stats, self.loss = self._get_losses(opt)
29 | self.model_with_loss = ModleWithLoss(model, self.loss)
30 | self.optimizer.add_param_group({'params': self.loss.parameters()})
31 |
32 | def set_device(self, gpus, chunk_sizes, device):
33 | if len(gpus) > 1:
34 | self.model_with_loss = DataParallel(
35 | self.model_with_loss, device_ids=gpus,
36 | chunk_sizes=chunk_sizes).to(device)
37 | else:
38 | self.model_with_loss = self.model_with_loss.to(device)
39 |
40 | for state in self.optimizer.state.values():
41 | for k, v in state.items():
42 | if isinstance(v, torch.Tensor):
43 | state[k] = v.to(device=device, non_blocking=True)
44 |
45 | def run_epoch(self, phase, epoch, data_loader):
46 | model_with_loss = self.model_with_loss
47 | if phase == 'train':
48 | model_with_loss.train()
49 | else:
50 | if len(self.opt.gpus) > 1:
51 | model_with_loss = self.model_with_loss.module
52 | model_with_loss.eval()
53 | torch.cuda.empty_cache()
54 |
55 | opt = self.opt
56 | results = {}
57 | data_time, batch_time = AverageMeter(), AverageMeter()
58 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
59 | num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
60 | bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
61 | end = time.time()
62 | for iter_id, batch in enumerate(data_loader):
63 | if iter_id >= num_iters:
64 | break
65 | data_time.update(time.time() - end)
66 |
67 | for k in batch:
68 | if k != 'meta':
69 | batch[k] = batch[k].to(device=opt.device, non_blocking=True)
70 |
71 | output, loss, loss_stats = model_with_loss(batch)
72 | loss = loss.mean()
73 | if phase == 'train':
74 | self.optimizer.zero_grad()
75 | loss.backward()
76 | self.optimizer.step()
77 | batch_time.update(time.time() - end)
78 | end = time.time()
79 |
80 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
81 | epoch, iter_id, num_iters, phase=phase,
82 | total=bar.elapsed_td, eta=bar.eta_td)
83 | for l in avg_loss_stats:
84 | avg_loss_stats[l].update(
85 | loss_stats[l].mean().item(), batch['input'].size(0))
86 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
87 | if not opt.hide_data_time:
88 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
89 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
90 | if opt.print_iter > 0:
91 | if iter_id % opt.print_iter == 0:
92 | print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
93 | else:
94 | bar.next()
95 |
96 | if opt.test:
97 | self.save_result(output, batch, results)
98 | del output, loss, loss_stats, batch
99 |
100 | bar.finish()
101 | ret = {k: v.avg for k, v in avg_loss_stats.items()}
102 | ret['time'] = bar.elapsed_td.total_seconds() / 60.
103 | return ret, results
104 |
105 |
106 | def debug(self, batch, output, iter_id):
107 | raise NotImplementedError
108 |
109 | def save_result(self, output, batch, results):
110 | raise NotImplementedError
111 |
112 | def _get_losses(self, opt):
113 | raise NotImplementedError
114 |
115 | def val(self, epoch, data_loader):
116 | return self.run_epoch('val', epoch, data_loader)
117 |
118 | def train(self, epoch, data_loader):
119 | return self.run_epoch('train', epoch, data_loader)
--------------------------------------------------------------------------------
/lib/trains/mot.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import math
6 | import torch
7 | import numpy as np
8 | import torch.nn as nn
9 | import torch.nn.functional as F
10 | import torchvision
11 |
12 | from fvcore.nn import sigmoid_focal_loss_jit
13 |
14 | from lib.models.losses import FocalLoss, TripletLoss
15 | from lib.models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss
16 | from lib.models.decode import mot_decode
17 | from lib.models.utils import _sigmoid, _tranpose_and_gather_feat
18 | from lib.utils.post_process import ctdet_post_process
19 | from .base_trainer import BaseTrainer
20 |
21 |
22 | class MotLoss(torch.nn.Module):
23 | def __init__(self, opt):
24 | super(MotLoss, self).__init__()
25 | self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
26 | self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
27 | RegLoss() if opt.reg_loss == 'sl1' else None
28 | self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \
29 | NormRegL1Loss() if opt.norm_wh else \
30 | RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg
31 | self.opt = opt
32 | self.emb_dim = opt.reid_dim
33 | self.nID = opt.nID
34 | self.classifier = nn.Linear(self.emb_dim, self.nID)
35 | if opt.id_loss == 'focal':
36 | torch.nn.init.normal_(self.classifier.weight, std=0.01)
37 | prior_prob = 0.01
38 | bias_value = -math.log((1 - prior_prob) / prior_prob)
39 | torch.nn.init.constant_(self.classifier.bias, bias_value)
40 | self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1)
41 | self.emb_scale = math.sqrt(2) * math.log(self.nID - 1)
42 | self.s_det = nn.Parameter(-1.85 * torch.ones(1))
43 | self.s_id = nn.Parameter(-1.05 * torch.ones(1))
44 |
45 | def forward(self, outputs, batch):
46 | opt = self.opt
47 | hm_loss, wh_loss, off_loss, id_loss = 0, 0, 0, 0
48 | for s in range(opt.num_stacks):
49 | output = outputs[s]
50 | if not opt.mse_loss:
51 | output['hm'] = _sigmoid(output['hm'])
52 |
53 | hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
54 | if opt.wh_weight > 0:
55 | wh_loss += self.crit_reg(
56 | output['wh'], batch['reg_mask'],
57 | batch['ind'], batch['wh']) / opt.num_stacks
58 |
59 | if opt.reg_offset and opt.off_weight > 0:
60 | off_loss += self.crit_reg(output['reg'], batch['reg_mask'],
61 | batch['ind'], batch['reg']) / opt.num_stacks
62 |
63 | if opt.id_weight > 0:
64 | id_head = _tranpose_and_gather_feat(output['id'], batch['ind'])
65 | id_head = id_head[batch['reg_mask'] > 0].contiguous()
66 | id_head = self.emb_scale * F.normalize(id_head)
67 | id_target = batch['ids'][batch['reg_mask'] > 0]
68 |
69 | id_output = self.classifier(id_head).contiguous()
70 | if self.opt.id_loss == 'focal':
71 | id_target_one_hot = id_output.new_zeros((id_head.size(0), self.nID)).scatter_(1,
72 | id_target.long().view(
73 | -1, 1), 1)
74 | id_loss += sigmoid_focal_loss_jit(id_output, id_target_one_hot,
75 | alpha=0.25, gamma=2.0, reduction="sum"
76 | ) / id_output.size(0)
77 | else:
78 | id_loss += self.IDLoss(id_output, id_target)
79 |
80 | det_loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + opt.off_weight * off_loss
81 | if opt.multi_loss == 'uncertainty':
82 | loss = torch.exp(-self.s_det) * det_loss + torch.exp(-self.s_id) * id_loss + (self.s_det + self.s_id)
83 | loss *= 0.5
84 | else:
85 | loss = det_loss + 0.1 * id_loss
86 |
87 | loss_stats = {'loss': loss, 'hm_loss': hm_loss,
88 | 'wh_loss': wh_loss, 'off_loss': off_loss, 'id_loss': id_loss}
89 | return loss, loss_stats
90 |
91 |
92 | class MotTrainer(BaseTrainer):
93 | def __init__(self, opt, model, optimizer=None):
94 | super(MotTrainer, self).__init__(opt, model, optimizer=optimizer)
95 |
96 | def _get_losses(self, opt):
97 | loss_states = ['loss', 'hm_loss', 'wh_loss', 'off_loss', 'id_loss']
98 | loss = MotLoss(opt)
99 | return loss_states, loss
100 |
101 | def save_result(self, output, batch, results):
102 | reg = output['reg'] if self.opt.reg_offset else None
103 | dets = mot_decode(
104 | output['hm'], output['wh'], reg=reg,
105 | cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
106 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
107 | dets_out = ctdet_post_process(
108 | dets.copy(), batch['meta']['c'].cpu().numpy(),
109 | batch['meta']['s'].cpu().numpy(),
110 | output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1])
111 | results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
112 |
--------------------------------------------------------------------------------
/lib/trains/train_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from .mot import MotTrainer
6 |
7 |
8 | train_factory = {
9 | 'mot': MotTrainer,
10 | }
11 |
--------------------------------------------------------------------------------
/lib/utils/__pycache__/image.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/utils/__pycache__/image.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/utils/__pycache__/post_process.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/utils/__pycache__/post_process.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/utils/__pycache__/utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/utils/__pycache__/utils.cpython-38.pyc
--------------------------------------------------------------------------------
/lib/utils/image.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
5 | # Modified by Xingyi Zhou
6 | # ------------------------------------------------------------------------------
7 |
8 | from __future__ import absolute_import
9 | from __future__ import division
10 | from __future__ import print_function
11 |
12 | import numpy as np
13 | import cv2
14 | import random
15 |
16 | def flip(img):
17 | return img[:, :, ::-1].copy()
18 |
19 | def transform_preds(coords, center, scale, output_size):
20 | target_coords = np.zeros(coords.shape)
21 | trans = get_affine_transform(center, scale, 0, output_size, inv=1)
22 | for p in range(coords.shape[0]):
23 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
24 | return target_coords
25 |
26 |
27 | def get_affine_transform(center,
28 | scale,
29 | rot,
30 | output_size,
31 | shift=np.array([0, 0], dtype=np.float32),
32 | inv=0):
33 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
34 | scale = np.array([scale, scale], dtype=np.float32)
35 |
36 | scale_tmp = scale
37 | src_w = scale_tmp[0]
38 | dst_w = output_size[0]
39 | dst_h = output_size[1]
40 |
41 | rot_rad = np.pi * rot / 180
42 | src_dir = get_dir([0, src_w * -0.5], rot_rad)
43 | dst_dir = np.array([0, dst_w * -0.5], np.float32)
44 |
45 | src = np.zeros((3, 2), dtype=np.float32)
46 | dst = np.zeros((3, 2), dtype=np.float32)
47 | src[0, :] = center + scale_tmp * shift
48 | src[1, :] = center + src_dir + scale_tmp * shift
49 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
50 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
51 |
52 | src[2:, :] = get_3rd_point(src[0, :], src[1, :])
53 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
54 |
55 | if inv:
56 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
57 | else:
58 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
59 |
60 | return trans
61 |
62 |
63 | def affine_transform(pt, t):
64 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
65 | new_pt = np.dot(t, new_pt)
66 | return new_pt[:2]
67 |
68 |
69 | def get_3rd_point(a, b):
70 | direct = a - b
71 | return b + np.array([-direct[1], direct[0]], dtype=np.float32)
72 |
73 |
74 | def get_dir(src_point, rot_rad):
75 | sn, cs = np.sin(rot_rad), np.cos(rot_rad)
76 |
77 | src_result = [0, 0]
78 | src_result[0] = src_point[0] * cs - src_point[1] * sn
79 | src_result[1] = src_point[0] * sn + src_point[1] * cs
80 |
81 | return src_result
82 |
83 |
84 | def crop(img, center, scale, output_size, rot=0):
85 | trans = get_affine_transform(center, scale, rot, output_size)
86 |
87 | dst_img = cv2.warpAffine(img,
88 | trans,
89 | (int(output_size[0]), int(output_size[1])),
90 | flags=cv2.INTER_LINEAR)
91 |
92 | return dst_img
93 |
94 |
95 | def gaussian_radius(det_size, min_overlap=0.7):
96 | height, width = det_size
97 |
98 | a1 = 1
99 | b1 = (height + width)
100 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
101 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
102 | r1 = (b1 + sq1) / 2
103 |
104 | a2 = 4
105 | b2 = 2 * (height + width)
106 | c2 = (1 - min_overlap) * width * height
107 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
108 | r2 = (b2 + sq2) / 2
109 |
110 | a3 = 4 * min_overlap
111 | b3 = -2 * min_overlap * (height + width)
112 | c3 = (min_overlap - 1) * width * height
113 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
114 | r3 = (b3 + sq3) / 2
115 | return min(r1, r2, r3)
116 |
117 |
118 | def gaussian2D(shape, sigma=1):
119 | m, n = [(ss - 1.) / 2. for ss in shape]
120 | y, x = np.ogrid[-m:m+1,-n:n+1]
121 |
122 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
123 | h[h < np.finfo(h.dtype).eps * h.max()] = 0
124 | return h
125 |
126 | def draw_umich_gaussian(heatmap, center, radius, k=1):
127 | diameter = 2 * radius + 1
128 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
129 |
130 | x, y = int(center[0]), int(center[1])
131 |
132 | height, width = heatmap.shape[0:2]
133 |
134 | left, right = min(x, radius), min(width - x, radius + 1)
135 | top, bottom = min(y, radius), min(height - y, radius + 1)
136 |
137 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
138 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
139 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
140 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
141 | return heatmap
142 |
143 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
144 | diameter = 2 * radius + 1
145 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
146 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
147 | dim = value.shape[0]
148 | reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value
149 | if is_offset and dim == 2:
150 | delta = np.arange(diameter*2+1) - radius
151 | reg[0] = reg[0] - delta.reshape(1, -1)
152 | reg[1] = reg[1] - delta.reshape(-1, 1)
153 |
154 | x, y = int(center[0]), int(center[1])
155 |
156 | height, width = heatmap.shape[0:2]
157 |
158 | left, right = min(x, radius), min(width - x, radius + 1)
159 | top, bottom = min(y, radius), min(height - y, radius + 1)
160 |
161 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
162 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
163 | masked_gaussian = gaussian[radius - top:radius + bottom,
164 | radius - left:radius + right]
165 | masked_reg = reg[:, radius - top:radius + bottom,
166 | radius - left:radius + right]
167 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
168 | idx = (masked_gaussian >= masked_heatmap).reshape(
169 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1])
170 | masked_regmap = (1-idx) * masked_regmap + idx * masked_reg
171 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
172 | return regmap
173 |
174 |
175 | def draw_msra_gaussian(heatmap, center, sigma):
176 | tmp_size = sigma * 3
177 | mu_x = int(center[0] + 0.5)
178 | mu_y = int(center[1] + 0.5)
179 | w, h = heatmap.shape[0], heatmap.shape[1]
180 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
181 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
182 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
183 | return heatmap
184 | size = 2 * tmp_size + 1
185 | x = np.arange(0, size, 1, np.float32)
186 | y = x[:, np.newaxis]
187 | x0 = y0 = size // 2
188 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
189 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
190 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
191 | img_x = max(0, ul[0]), min(br[0], h)
192 | img_y = max(0, ul[1]), min(br[1], w)
193 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
194 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
195 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
196 | return heatmap
197 |
198 | def grayscale(image):
199 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
200 |
201 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
202 | alpha = data_rng.normal(scale=alphastd, size=(3, ))
203 | image += np.dot(eigvec, eigval * alpha)
204 |
205 | def blend_(alpha, image1, image2):
206 | image1 *= alpha
207 | image2 *= (1 - alpha)
208 | image1 += image2
209 |
210 | def saturation_(data_rng, image, gs, gs_mean, var):
211 | alpha = 1. + data_rng.uniform(low=-var, high=var)
212 | blend_(alpha, image, gs[:, :, None])
213 |
214 | def brightness_(data_rng, image, gs, gs_mean, var):
215 | alpha = 1. + data_rng.uniform(low=-var, high=var)
216 | image *= alpha
217 |
218 | def contrast_(data_rng, image, gs, gs_mean, var):
219 | alpha = 1. + data_rng.uniform(low=-var, high=var)
220 | blend_(alpha, image, gs_mean)
221 |
222 | def color_aug(data_rng, image, eig_val, eig_vec):
223 | functions = [brightness_, contrast_, saturation_]
224 | random.shuffle(functions)
225 |
226 | gs = grayscale(image)
227 | gs_mean = gs.mean()
228 | for f in functions:
229 | f(data_rng, image, gs, gs_mean, 0.4)
230 | lighting_(data_rng, image, 0.1, eig_val, eig_vec)
231 |
--------------------------------------------------------------------------------
/lib/utils/post_process.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | from .image import transform_preds
7 |
8 |
9 | def ctdet_post_process(dets, c, s, h, w, num_classes):
10 | # dets: batch x max_dets x dim
11 | # return 1-based class det dict
12 | ret = []
13 | for i in range(dets.shape[0]):
14 | top_preds = {}
15 | dets[i, :, :2] = transform_preds(
16 | dets[i, :, 0:2], c[i], s[i], (w, h))
17 | dets[i, :, 2:4] = transform_preds(
18 | dets[i, :, 2:4], c[i], s[i], (w, h))
19 | classes = dets[i, :, -1]
20 | for j in range(num_classes):
21 | inds = (classes == j)
22 | top_preds[j + 1] = np.concatenate([
23 | dets[i, inds, :4].astype(np.float32),
24 | dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist()
25 | ret.append(top_preds)
26 | return ret
27 |
28 |
--------------------------------------------------------------------------------
/lib/utils/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import numpy as np
7 |
8 | class AverageMeter(object):
9 | """Computes and stores the average and current value"""
10 | def __init__(self):
11 | self.reset()
12 |
13 | def reset(self):
14 | self.val = 0
15 | self.avg = 0
16 | self.sum = 0
17 | self.count = 0
18 |
19 | def update(self, val, n=1):
20 | self.val = val
21 | self.sum += val * n
22 | self.count += n
23 | if self.count > 0:
24 | self.avg = self.sum / self.count
25 |
26 |
27 | def xyxy2xywh(x):
28 | # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h]
29 | y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape)
30 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2
31 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2
32 | y[:, 2] = x[:, 2] - x[:, 0]
33 | y[:, 3] = x[:, 3] - x[:, 1]
34 | return y
35 |
36 |
37 | def xywh2xyxy(x):
38 | # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2]
39 | y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape)
40 | y[:, 0] = (x[:, 0] - x[:, 2] / 2)
41 | y[:, 1] = (x[:, 1] - x[:, 3] / 2)
42 | y[:, 2] = (x[:, 0] + x[:, 2] / 2)
43 | y[:, 3] = (x[:, 1] + x[:, 3] / 2)
44 | return y
45 |
46 | def ap_per_class(tp, conf, pred_cls, target_cls):
47 | """ Compute the average precision, given the recall and precision curves.
48 | Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics.
49 | # Arguments
50 | tp: True positives (list).
51 | conf: Objectness value from 0-1 (list).
52 | pred_cls: Predicted object classes (list).
53 | target_cls: True object classes (list).
54 | # Returns
55 | The average precision as computed in py-faster-rcnn.
56 | """
57 |
58 | # lists/pytorch to numpy
59 | tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(pred_cls), np.array(target_cls)
60 |
61 | # Sort by objectness
62 | i = np.argsort(-conf)
63 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
64 |
65 | # Find unique classes
66 | unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0))
67 |
68 | # Create Precision-Recall curve and compute AP for each class
69 | ap, p, r = [], [], []
70 | for c in unique_classes:
71 | i = pred_cls == c
72 | n_gt = sum(target_cls == c) # Number of ground truth objects
73 | n_p = sum(i) # Number of predicted objects
74 |
75 | if (n_p == 0) and (n_gt == 0):
76 | continue
77 | elif (n_p == 0) or (n_gt == 0):
78 | ap.append(0)
79 | r.append(0)
80 | p.append(0)
81 | else:
82 | # Accumulate FPs and TPs
83 | fpc = np.cumsum(1 - tp[i])
84 | tpc = np.cumsum(tp[i])
85 |
86 | # Recall
87 | recall_curve = tpc / (n_gt + 1e-16)
88 | r.append(tpc[-1] / (n_gt + 1e-16))
89 |
90 | # Precision
91 | precision_curve = tpc / (tpc + fpc)
92 | p.append(tpc[-1] / (tpc[-1] + fpc[-1]))
93 |
94 | # AP from recall-precision curve
95 | ap.append(compute_ap(recall_curve, precision_curve))
96 |
97 | return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p)
98 |
99 |
100 | def compute_ap(recall, precision):
101 | """ Compute the average precision, given the recall and precision curves.
102 | Code originally from https://github.com/rbgirshick/py-faster-rcnn.
103 | # Arguments
104 | recall: The recall curve (list).
105 | precision: The precision curve (list).
106 | # Returns
107 | The average precision as computed in py-faster-rcnn.
108 | """
109 | # correct AP calculation
110 | # first append sentinel values at the end
111 |
112 | mrec = np.concatenate(([0.], recall, [1.]))
113 | mpre = np.concatenate(([0.], precision, [0.]))
114 |
115 | # compute the precision envelope
116 | for i in range(mpre.size - 1, 0, -1):
117 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
118 |
119 | # to calculate area under PR curve, look for points
120 | # where X axis (recall) changes value
121 | i = np.where(mrec[1:] != mrec[:-1])[0]
122 |
123 | # and sum (\Delta recall) * prec
124 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
125 | return ap
126 |
127 |
128 | def bbox_iou(box1, box2, x1y1x2y2=False):
129 | """
130 | Returns the IoU of two bounding boxes
131 | """
132 | N, M = len(box1), len(box2)
133 | if x1y1x2y2:
134 | # Get the coordinates of bounding boxes
135 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
136 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
137 | else:
138 | # Transform from center and width to exact coordinates
139 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
140 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
141 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
142 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
143 |
144 | # get the coordinates of the intersection rectangle
145 | inter_rect_x1 = torch.max(b1_x1.unsqueeze(1), b2_x1)
146 | inter_rect_y1 = torch.max(b1_y1.unsqueeze(1), b2_y1)
147 | inter_rect_x2 = torch.min(b1_x2.unsqueeze(1), b2_x2)
148 | inter_rect_y2 = torch.min(b1_y2.unsqueeze(1), b2_y2)
149 | # Intersection area
150 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0)
151 | # Union Area
152 | b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1))
153 | b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).view(-1,1).expand(N,M)
154 | b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).view(1,-1).expand(N,M)
155 |
156 | return inter_area / (b1_area + b2_area - inter_area + 1e-16)
157 |
158 |
159 | def generate_anchors(nGh, nGw, anchor_wh):
160 | nA = len(anchor_wh)
161 | yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw), indexing='ij')
162 |
163 | mesh = np.stack([xx, yy], axis=0) # Shape 2, nGh, nGw
164 | mesh = np.tile(np.expand_dims(mesh, axis=0), (nA, 1, 1, 1)) # Shape nA x 2 x nGh x nGw
165 | anchor_offset_mesh = np.tile(np.expand_dims(np.expand_dims(anchor_wh, -1), -1), (1, 1, nGh, nGw)) # Shape nA x 2 x nGh x nGw
166 | anchor_mesh = np.concatenate((mesh, anchor_offset_mesh), axis=1) # Shape nA x 4 x nGh x nGw
167 | return anchor_mesh
168 |
169 |
170 | def encode_delta(gt_box_list, fg_anchor_list):
171 | px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
172 | fg_anchor_list[:, 2], fg_anchor_list[:,3]
173 | gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
174 | gt_box_list[:, 2], gt_box_list[:, 3]
175 | dx = (gx - px) / pw
176 | dy = (gy - py) / ph
177 | dw = np.log(gw/pw)
178 | dh = np.log(gh/ph)
179 | return np.stack((dx, dy, dw, dh), axis=1)
180 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from interface import VideoQt,QtCore,QApplication,QWidget
3 |
4 | if __name__ == "__main__":
5 | QtCore.QCoreApplication.setAttribute(QtCore.Qt.AA_EnableHighDpiScaling)
6 | app = QApplication(sys.argv)
7 | widget = QWidget()
8 | widget.setStyleSheet('background-color:black;')
9 | widget.setWindowOpacity(0.8)
10 | video = VideoQt()
11 | video.setupUi(widget)
12 | widget.show()
13 | sys.exit(app.exec_())
14 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | yacs
2 | opencv-python
3 | PyYAML
4 | cython-bbox
5 | scipy
6 | progress
7 | motmetrics
8 | matplotlib
9 | lap
10 | openpyxl
11 | Pillow
12 | tensorboardX
13 | fvcore
--------------------------------------------------------------------------------
/src/VideoTimer.py:
--------------------------------------------------------------------------------
1 | import time
2 | from PyQt5.QtCore import QObject, pyqtSignal, QThread, QMutex, QMutexLocker
3 |
4 | class Communicate(QObject):
5 | signal = pyqtSignal(str)
6 |
7 | class VideoTimer(QThread):
8 | def __init__(self, frequent=20):
9 | QThread.__init__(self)
10 | self.stopped = False
11 | self.frequent = frequent
12 | self.timeSignal = Communicate()
13 | self.mutex = QMutex()
14 |
15 | def run(self):
16 | with QMutexLocker(self.mutex):
17 | self.stopped = False
18 | while True:
19 | if self.stopped:
20 | return
21 | self.timeSignal.signal.emit("1")
22 | time.sleep(1 / self.frequent)
23 |
24 | def stop(self):
25 | with QMutexLocker(self.mutex):
26 | self.stopped = True
27 |
28 | def is_stopped(self):
29 | with QMutexLocker(self.mutex):
30 | return self.stopped
31 |
32 | def set_fps(self, fps):
33 | self.frequent = fps
--------------------------------------------------------------------------------
/src/built_in_camera_track.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import logging
4 | import torch
5 | import time
6 | import numpy as np
7 | from lib.tracking_utils.timer import Timer
8 | from lib.datasets.dataset.jde import letterbox
9 | from lib.tracking_utils.utils import mkdir_if_missing
10 | from lib.tracking_utils import visualization as vis
11 |
12 |
13 | def built_in_camera_tracker(threshold_value, root_dir, use_gpu,
14 | opts, JDETracker, fourcc, QPixmap, QImage, videolabel,
15 | QMessageBox, QApplication, logger):
16 | try:
17 | cap_test = cv2.VideoCapture(0)
18 | if cap_test is None or not cap_test.isOpened():
19 | msg_box = QMessageBox(QMessageBox.Warning, 'Warning!', 'The build-in camera is not exit')
20 | msg_box.exec_()
21 | else:
22 | cap_test.release()
23 |
24 | # if flag == 0:
25 | # msg_box = QMessageBox(QMessageBox.Warning, '提示!', '请确保当前设备有摄像头')
26 | # msg_box.exec_()
27 |
28 | # Set params
29 | logger.setLevel(logging.INFO)
30 | print(f'camera_id: {0}')
31 |
32 | model_dir = root_dir + '/models'
33 |
34 | for pth in os.listdir(model_dir):
35 | if pth.split('.')[-1] == 'pth':
36 | model_dir += ('/' + pth)
37 | break
38 |
39 | print(f'model_dir: {model_dir}')
40 |
41 | output_video_dir = root_dir + '/output_built_in_camera'
42 |
43 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
44 | opt = opts(current_dir=root_dir,model_path=model_dir,
45 | input_path=None,threshold=threshold_value,
46 | match_threshold=0.8,use_gpu=use_gpu).init()
47 |
48 | opt.output_root = output_video_dir
49 | print(f'current_use_gpus: {opt.gpus}')
50 | print(f'output_video_dir: {output_video_dir}')
51 | mkdir_if_missing(output_video_dir)
52 |
53 | # start to pre_track
54 | capture = cv2.VideoCapture(0)
55 | # frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
56 | # print(f'frame_count: {frame_count}', frame_count)
57 | #
58 | # progressBar.setMaximum(frame_count)
59 | frame_rate = 30
60 | tracker = JDETracker(opt, frame_rate=frame_rate)
61 | video_name = time.strftime('%Y_%m_%d_%H_%M',time.localtime()) + '_.mp4'
62 | print(f'video_name: {video_name}')
63 |
64 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
65 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
66 | fourcc = fourcc
67 | writer = cv2.VideoWriter((output_video_dir + '/' + video_name), fourcc, frame_rate, (width, height))
68 | results = []
69 | frame_id = 0
70 | timer = Timer()
71 | use_cuda = True
72 | if use_gpu == '-1':
73 | use_cuda = False
74 |
75 | while (True):
76 | try:
77 | # run tracking
78 | ok, frame = capture.read()
79 | if not ok:
80 | break
81 | # frame = cv2.resize(frame, (1920, 1080))
82 | img, _, _, _ = letterbox(frame, height=1088, width=608)
83 | img = img[:, :, ::-1].transpose(2, 0, 1)
84 | img = np.ascontiguousarray(img, dtype=np.float32)
85 | img /= 255.0
86 | timer.tic()
87 |
88 | if use_cuda:
89 | blob = torch.from_numpy(img).cuda().unsqueeze(0)
90 | else:
91 | blob = torch.from_numpy(img).unsqueeze(0)
92 |
93 | online_targets = tracker.update(blob, frame)
94 | online_tlwhs = []
95 | online_ids = []
96 | online_scores = []
97 | for t in online_targets:
98 | tlwh = t.tlwh
99 | tid = t.track_id
100 | vertical = tlwh[2] / tlwh[3] > 1.6
101 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
102 | online_tlwhs.append(tlwh)
103 | online_ids.append(tid)
104 | online_scores.append(t.score)
105 | timer.toc()
106 | # save results
107 | # results.append((frame_id + 1, online_tlwhs, online_ids))
108 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
109 | fps = 1. / timer.average_time
110 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, frame_id=frame_id,
111 | fps=fps)
112 | frame_id += 1
113 | print(f'detect frame: {frame_id}')
114 |
115 | height, width = online_im.shape[:2]
116 | if online_im.ndim == 3:
117 | rgb = cv2.cvtColor(online_im, cv2.COLOR_BGR2RGB)
118 | elif online_im.ndim == 2:
119 | rgb = cv2.cvtColor(online_im, cv2.COLOR_GRAY2BGR)
120 |
121 | temp_image = QImage(rgb.flatten(), width, height, QImage.Format_RGB888)
122 | temp_pixmap = QPixmap.fromImage(temp_image)
123 | writer.write(online_im)
124 | videolabel.setPixmap(temp_pixmap)
125 | QApplication.processEvents()
126 | except:
127 | writer.release()
128 | writer.release()
129 | except:
130 | pass
--------------------------------------------------------------------------------
/src/external_camera_track.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import logging
4 | import torch
5 | import time
6 | import numpy as np
7 | from lib.tracking_utils.timer import Timer
8 | from lib.datasets.dataset.jde import letterbox
9 | from lib.tracking_utils.utils import mkdir_if_missing
10 | from lib.tracking_utils import visualization as vis
11 |
12 |
13 | def external_camera_tracker(threshold_value, root_dir, use_gpu,
14 | opts, JDETracker, fourcc, QPixmap, QImage, videolabel,
15 | QMessageBox, QApplication, logger):
16 | try:
17 | cap_test = cv2.VideoCapture(1)
18 | if cap_test is None or not cap_test.isOpened():
19 | msg_box = QMessageBox(QMessageBox.Warning, 'Warning!', 'The external camera is not exit')
20 | msg_box.exec_()
21 | else:
22 | cap_test.release()
23 |
24 | # if flag == 0:
25 | # msg_box = QMessageBox(QMessageBox.Warning, '提示!', '请确保当前设备有摄像头')
26 | # msg_box.exec_()
27 |
28 | # Set parameters.
29 | logger.setLevel(logging.INFO)
30 | print(f'camera_id: {1}')
31 |
32 | model_dir = root_dir + '/models'
33 |
34 | for pth in os.listdir(model_dir):
35 | if pth.split('.')[-1] == 'pth':
36 | model_dir += ('/' + pth)
37 | break
38 |
39 | print(f'model_dir: {model_dir}')
40 |
41 | output_video_dir = root_dir + '/output_external_camera'
42 |
43 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
44 | opt = opts(current_dir=root_dir, model_path=model_dir,
45 | input_path=None, threshold=threshold_value,
46 | match_threshold=0.8, use_gpu=use_gpu).init()
47 |
48 | opt.output_root = output_video_dir
49 | print(f'current_use_gpus: {opt.gpus}')
50 | print(f'output_video_dir: {output_video_dir}')
51 | mkdir_if_missing(output_video_dir)
52 |
53 | # start to pre_track
54 | capture = cv2.VideoCapture(1)
55 | # frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
56 | # print(f'frame_count: {frame_count}', frame_count)
57 | #
58 | # progressBar.setMaximum(frame_count)
59 | frame_rate = 30
60 | tracker = JDETracker(opt, frame_rate=frame_rate)
61 | video_name = time.strftime('%Y_%m_%d_%H_%M', time.localtime()) + '_.mp4'
62 | print(f'video_name: {video_name}')
63 |
64 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
65 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
66 | fourcc = fourcc
67 | writer = cv2.VideoWriter((output_video_dir + '/' + video_name), fourcc, frame_rate, (width, height))
68 | results = []
69 | frame_id = 0
70 | timer = Timer()
71 | use_cuda = True
72 | if use_gpu == '-1':
73 | use_cuda = False
74 |
75 | while (True):
76 | try:
77 | # run tracking
78 | ok, frame = capture.read()
79 | if not ok:
80 | break
81 | # frame = cv2.resize(frame, (1920, 1080))
82 | img, _, _, _ = letterbox(frame, height=1088, width=608)
83 | img = img[:, :, ::-1].transpose(2, 0, 1)
84 | img = np.ascontiguousarray(img, dtype=np.float32)
85 | img /= 255.0
86 | timer.tic()
87 |
88 | if use_cuda:
89 | blob = torch.from_numpy(img).cuda().unsqueeze(0)
90 | else:
91 | blob = torch.from_numpy(img).unsqueeze(0)
92 |
93 | online_targets = tracker.update(blob, frame)
94 | online_tlwhs = []
95 | online_ids = []
96 | online_scores = []
97 | for t in online_targets:
98 | tlwh = t.tlwh
99 | tid = t.track_id
100 | vertical = tlwh[2] / tlwh[3] > 1.6
101 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
102 | online_tlwhs.append(tlwh)
103 | online_ids.append(tid)
104 | online_scores.append(t.score)
105 | timer.toc()
106 | # save results
107 | # results.append((frame_id + 1, online_tlwhs, online_ids))
108 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
109 | fps = 1. / timer.average_time
110 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, frame_id=frame_id,
111 | fps=fps)
112 | frame_id += 1
113 | print(f'detect frame: {frame_id}')
114 |
115 | height, width = online_im.shape[:2]
116 | if online_im.ndim == 3:
117 | rgb = cv2.cvtColor(online_im, cv2.COLOR_BGR2RGB)
118 | elif online_im.ndim == 2:
119 | rgb = cv2.cvtColor(online_im, cv2.COLOR_GRAY2BGR)
120 |
121 | temp_image = QImage(rgb.flatten(), width, height, QImage.Format_RGB888)
122 | temp_pixmap = QPixmap.fromImage(temp_image)
123 | writer.write(online_im)
124 | videolabel.setPixmap(temp_pixmap)
125 | QApplication.processEvents()
126 | except:
127 | writer.release()
128 | writer.release()
129 | except:
130 | pass
--------------------------------------------------------------------------------
/src/tracker/__pycache__/_init_paths.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/src/tracker/__pycache__/_init_paths.cpython-38.pyc
--------------------------------------------------------------------------------
/src/tracker/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 | def add_path(path):
5 | if path not in sys.path:
6 | sys.path.insert(0, path)
7 |
8 | this_dir = osp.dirname(__file__)
9 |
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 |
--------------------------------------------------------------------------------
/src/tracker/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 |
7 | import logging
8 | import os
9 | import os.path as osp
10 | from lib.opts import opts
11 | from lib.tracking_utils.utils import mkdir_if_missing
12 | from lib.tracking_utils.log import logger
13 | import lib.datasets.dataset.jde as datasets
14 | from track import eval_seq
15 |
16 |
17 | logger.setLevel(logging.INFO)
18 |
19 |
20 | def demo(opt):
21 | result_root = opt.output_root if opt.output_root != '' else '.'
22 | mkdir_if_missing(result_root)
23 |
24 | logger.info('Starting tracking...')
25 | dataloader = datasets.LoadVideo(opt.input_video, opt.img_size)
26 | result_filename = os.path.join(result_root, 'results.txt')
27 | frame_rate = dataloader.frame_rate
28 |
29 | frame_dir = None if opt.output_format == 'text' else osp.join(result_root, 'frame')
30 | eval_seq(opt, dataloader, 'mot', result_filename,
31 | save_dir=frame_dir, show_image=False, frame_rate=frame_rate,
32 | use_cuda=opt.gpus!=[-1])
33 |
34 | if opt.output_format == 'video':
35 | output_video_path = osp.join(result_root, 'MOT16-03-results.mp4')
36 | cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -b 5000k -c:v mpeg4 {}'.format(osp.join(result_root, 'frame'), output_video_path)
37 | os.system(cmd_str)
38 |
39 |
40 | if __name__ == '__main__':
41 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
42 | opt = opts().init()
43 | demo(opt)
44 |
--------------------------------------------------------------------------------
/src/tracker/detect.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 | import os
7 | import os.path as osp
8 | import cv2
9 | import logging
10 | import argparse
11 | import motmetrics as mm
12 | import numpy as np
13 |
14 | from lib.tracker.fusetracker import FuseTracker
15 | from lib.tracking_utils import visualization as vis
16 | from lib.tracking_utils.log import logger
17 | from lib.tracking_utils.timer import Timer
18 | from lib.tracking_utils.evaluation import Evaluator
19 | import lib.datasets.dataset.jde as datasets
20 | import torch
21 | from lib.tracking_utils.utils import mkdir_if_missing, tlbr2tlwh
22 | from lib.opts import opts
23 | from lib.models.decode import mot_decode
24 | from lib.utils.post_process import ctdet_post_process
25 | from lib.models.model import create_model, load_model
26 |
27 |
28 | def write_results_score(filename, results):
29 | save_format = '{frame},{x1},{y1},{w},{h},{s}\n'
30 | with open(filename, 'w') as f:
31 | for frame_id, tlwhs, scores in results:
32 | for tlwh, score in zip(tlwhs, scores):
33 | x1, y1, w, h = tlwh
34 | line = save_format.format(frame=frame_id, x1=x1, y1=y1, w=w, h=h, s=score)
35 | f.write(line)
36 | print('save results to {}'.format(filename))
37 |
38 |
39 | def post_process(opt, dets, meta):
40 | dets = dets.detach().cpu().numpy()
41 | dets = dets.reshape(1, -1, dets.shape[2])
42 | dets = ctdet_post_process(
43 | dets.copy(), [meta['c']], [meta['s']],
44 | meta['out_height'], meta['out_width'], opt.num_classes)
45 | for j in range(1, opt.num_classes + 1):
46 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
47 | return dets[0]
48 |
49 |
50 | def merge_outputs(opt, detections):
51 | results = {}
52 | for j in range(1, opt.num_classes + 1):
53 | results[j] = np.concatenate(
54 | [detection[j] for detection in detections], axis=0).astype(np.float32)
55 |
56 | scores = np.hstack(
57 | [results[j][:, 4] for j in range(1, opt.num_classes + 1)])
58 | if len(scores) > 128:
59 | kth = len(scores) - 128
60 | thresh = np.partition(scores, kth)[kth]
61 | for j in range(1, opt.num_classes + 1):
62 | keep_inds = (results[j][:, 4] >= thresh)
63 | results[j] = results[j][keep_inds]
64 | return results
65 |
66 |
67 | def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30):
68 | if save_dir:
69 | mkdir_if_missing(save_dir)
70 | if opt.gpus[0] >= 0:
71 | opt.device = torch.device('cuda')
72 | else:
73 | opt.device = torch.device('cpu')
74 | print('Creating model...')
75 | model = create_model(opt.arch, opt.heads, opt.head_conv)
76 | model = load_model(model, opt.load_model)
77 | # model = torch.nn.DataParallel(model)
78 | model = model.to(opt.device)
79 | model.eval()
80 | timer = Timer()
81 | results = []
82 | frame_id = 0
83 | for path, img, img0 in dataloader:
84 | if frame_id % 20 == 0:
85 | logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time)))
86 | # run detecting
87 | timer.tic()
88 | blob = torch.from_numpy(img).cuda().unsqueeze(0)
89 | width = img0.shape[1]
90 | height = img0.shape[0]
91 | inp_height = blob.shape[2]
92 | inp_width = blob.shape[3]
93 | c = np.array([width / 2., height / 2.], dtype=np.float32)
94 | s = max(float(inp_width) / float(inp_height) * height, width) * 1.0
95 | meta = {'c': c, 's': s,
96 | 'out_height': inp_height // opt.down_ratio,
97 | 'out_width': inp_width // opt.down_ratio}
98 | with torch.no_grad():
99 | output = model(blob)[-1]
100 | hm = output['hm'].sigmoid_()
101 | wh = output['wh']
102 | reg = output['reg'] if opt.reg_offset else None
103 | dets, inds = mot_decode(hm, wh, reg=reg, ltrb=opt.ltrb, K=opt.K)
104 |
105 | dets = post_process(opt, dets, meta)
106 | dets = merge_outputs(opt, [dets])[1]
107 |
108 | dets = dets[dets[:, 4] > 0.1]
109 | dets[:, :4] = tlbr2tlwh(dets[:, :4])
110 |
111 | tlwhs = []
112 | scores = []
113 | for *tlwh, conf in dets:
114 | tlwhs.append(tlwh)
115 | scores.append(conf)
116 | timer.toc()
117 | # save results
118 | results.append((frame_id + 1, tlwhs, scores))
119 | frame_id += 1
120 | # save results
121 | write_results_score(result_filename, results)
122 | #write_results_score_hie(result_filename, results, data_type)
123 | return frame_id, timer.average_time, timer.calls
124 |
125 |
126 | def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',), exp_name='demo',
127 | save_images=False, save_videos=False, show_image=True):
128 | logger.setLevel(logging.INFO)
129 | result_root = os.path.join(data_root, '..', 'dets', exp_name)
130 | mkdir_if_missing(result_root)
131 | data_type = 'mot'
132 |
133 | # run tracking
134 | accs = []
135 | n_frame = 0
136 | timer_avgs, timer_calls = [], []
137 | for seq in seqs:
138 | output_dir = os.path.join(data_root, '..', 'outputs', exp_name, seq) if save_images or save_videos else None
139 | logger.info('start seq: {}'.format(seq))
140 | dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size)
141 | result_filename = os.path.join(result_root, '{}.txt'.format(seq))
142 |
143 | nf, ta, tc = eval_seq(opt, dataloader, data_type, result_filename,
144 | save_dir=output_dir, show_image=show_image)
145 | n_frame += nf
146 | timer_avgs.append(ta)
147 | timer_calls.append(tc)
148 |
149 | timer_avgs = np.asarray(timer_avgs)
150 | timer_calls = np.asarray(timer_calls)
151 | all_time = np.dot(timer_avgs, timer_calls)
152 | avg_time = all_time / np.sum(timer_calls)
153 | logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(all_time, 1.0 / avg_time))
154 |
155 |
156 | if __name__ == '__main__':
157 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
158 | opt = opts().init()
159 | if opt.val_hie:
160 | seqs_str = '''1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19'''
161 | #seqs_str = '''9'''
162 | #seqs_str = '''11 12 13 14 15 16 17 18 19'''
163 | data_root = '/data/yfzhang/MOT/JDE/HIE/HIE20/images/train'
164 | elif opt.test_hie:
165 | seqs_str = '''20 21 22 23 24 25 26 27 28 29 30 31 32'''
166 | seqs_str = '''25'''
167 | data_root = '/data/yfzhang/MOT/JDE/HIE/HIE20/images/test'
168 | elif opt.val_mot17:
169 | seqs_str = '''MOT17-02-SDP
170 | MOT17-04-SDP
171 | MOT17-05-SDP
172 | MOT17-09-SDP
173 | MOT17-10-SDP
174 | MOT17-11-SDP
175 | MOT17-13-SDP'''
176 | #seqs_str = '''MOT17-02-SDP'''
177 | data_root = os.path.join(opt.data_dir, 'MOT17/images/train')
178 | else:
179 | seqs_str = None
180 | data_root = None
181 | seqs = [seq.strip() for seq in seqs_str.split()]
182 |
183 | main(opt,
184 | data_root=data_root,
185 | seqs=seqs,
186 | exp_name='fairmot_mot17',
187 | show_image=False,
188 | save_images=False,
189 | save_videos=False)
190 |
--------------------------------------------------------------------------------
/src/tracker/to_track.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import torch
7 | import numpy as np
8 | import os
9 | import logging
10 | import time
11 |
12 | from lib.opts import opts
13 | from lib.tracking_utils.utils import mkdir_if_missing
14 | from lib.tracking_utils.log import logger
15 | from lib.tracker.multitracker import JDETracker
16 | from lib.tracking_utils.timer import Timer
17 | from lib.tracking_utils import visualization as vis
18 | from lib.datasets.dataset.jde import letterbox
19 |
20 | logger.setLevel(logging.INFO)
21 |
22 | # set parameters.
23 | # 设置参数
24 | current_dir = os.path.dirname(os.path.realpath(__file__)).\
25 | replace('\\','/').replace('/src/tracker','')
26 | input_path = current_dir + '/videos/MOT16-03.mp4'
27 | input_file_name = (input_path.split('/')[-1]).split('.')[0]
28 | output_path = current_dir + '/output_video/'
29 | model_dir = current_dir + '/models'
30 | threshold = 0.4
31 | match_threshold = 0.8
32 |
33 | # Choose the GPU that you want to do this with(CPU: -1,GPU_1: 0,GPU_2: 1).
34 | # 追踪用的GPU(CPU: -1,GPU_1: 0,GPU_2: 1)
35 | set_use_gpu = '-1'
36 | print(f'input_path: {input_path}')
37 |
38 | for pth in os.listdir(model_dir):
39 | if pth.split('.')[-1] == 'pth':
40 | model_dir += ('/' + pth)
41 | break
42 |
43 | print(f'model_path: {model_dir}')
44 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
45 | opt = opts(current_dir=current_dir, model_path=model_dir,
46 | input_path=input_path, threshold=threshold,
47 | match_threshold=match_threshold, use_gpu=set_use_gpu).init()
48 | opt.output_root = output_path
49 | print(f'current_use_gpus: {opt.gpus}')
50 | if opt.output_root:
51 | mkdir_if_missing(opt.output_root)
52 | # frame_dir = None if opt.output_format == 'text' else osp.join(result_root, 'frame')
53 |
54 | #start to pre_track
55 | capture = cv2.VideoCapture(input_path)
56 | frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
57 | print(f'frame_count: {frame_count}')
58 | frame_rate = 30
59 | tracker = JDETracker(opt, frame_rate=frame_rate)
60 | video_name = input_file_name + '_' + time.strftime('%Y_%m_%d_%H_%M',time.localtime()) + '_.mp4'
61 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
62 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
63 | fps = frame_rate
64 | fourcc = cv2.VideoWriter_fourcc(*'mp4v')
65 | writer = cv2.VideoWriter(opt.output_root + video_name, fourcc, fps, (width, height))
66 | results = []
67 | frame_id = 0
68 | timer = Timer()
69 | use_cuda = True
70 | if set_use_gpu == '-1':
71 | use_cuda = False
72 |
73 | while(True):
74 | # run tracking
75 | ok,frame = capture.read()
76 | if not ok:
77 | break
78 | frame = cv2.resize(frame, (1920, 1080))
79 | img, _, _, _ = letterbox(frame, height=1088, width=608)
80 | img = img[:, :, ::-1].transpose(2, 0, 1)
81 | img = np.ascontiguousarray(img, dtype=np.float32)
82 | img /= 255.0
83 | timer.tic()
84 | if use_cuda:
85 | blob = torch.from_numpy(img).cuda().unsqueeze(0)
86 | else:
87 | blob = torch.from_numpy(img).unsqueeze(0)
88 | online_targets = tracker.update(blob, frame)
89 | online_tlwhs = []
90 | online_ids = []
91 | online_scores = []
92 | for t in online_targets:
93 | tlwh = t.tlwh
94 | tid = t.track_id
95 | vertical = tlwh[2] / tlwh[3] > 1.6
96 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
97 | online_tlwhs.append(tlwh)
98 | online_ids.append(tid)
99 | online_scores.append(t.score)
100 | timer.toc()
101 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
102 | fps = 1. / timer.average_time
103 | # save results
104 | #results.append((frame_id + 1, online_tlwhs, online_ids))
105 | #results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
106 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, frame_id=frame_id,
107 | fps=fps)
108 | frame_id += 1
109 | print(f'detect frame: {frame_id}')
110 | im = np.array(online_im)
111 | writer.write(im)
112 | writer.release()
113 | capture.release()
114 | # save results
115 | # write_results(result_filename, results, data_type)
116 | # write_results_score(result_filename, results, data_type)
117 |
--------------------------------------------------------------------------------
/src/tracker/to_track_camera.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import torch
7 | import numpy as np
8 | import os
9 | import logging
10 | import time
11 |
12 | from lib.opts import opts
13 | from lib.tracking_utils.utils import mkdir_if_missing
14 | from lib.tracking_utils.log import logger
15 | from lib.tracker.multitracker import JDETracker
16 | from lib.tracking_utils.timer import Timer
17 | from lib.tracking_utils import visualization as vis
18 | from lib.datasets.dataset.jde import letterbox
19 |
20 | logger.setLevel(logging.INFO)
21 | # set parameters.
22 | # 设置参数
23 | current_dir = os.path.dirname(os.path.realpath(__file__))\
24 | .replace('\\','/').replace('/src/tracker','')
25 | input_path = None
26 | input_file_name = None
27 | model_dir = current_dir + '/models'
28 | output_path = current_dir + '/output_camera'
29 | threshold = 0.4
30 | match_threshold = 0.8
31 | camera_id = -1
32 | if input_path == None:
33 | camera_id = 0
34 |
35 | # Choose the GPU that you want to do this with(CPU: -1,GPU_1: 0,GPU_2: 1).
36 | # 追踪用的GPU(CPU: -1,GPU_1: 0,GPU_2: 1)
37 | set_use_gpu = '-1'
38 | print(f'camera_id: {camera_id}')
39 |
40 | for pth in os.listdir(model_dir):
41 | if pth.split('.')[-1] == 'pth':
42 | model_dir += ('/' + pth)
43 | break
44 |
45 | print(f'model_path: {model_dir}')
46 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
47 | opt = opts(current_dir=current_dir, model_path=model_dir,
48 | input_path=input_path, threshold=threshold,
49 | match_threshold=match_threshold, use_gpu=set_use_gpu).init()
50 | opt.output_root = output_path
51 | print(f'current_use_gpus: {opt.gpus}')
52 | print(f'output_path: {opt.output_root}')
53 | mkdir_if_missing(opt.output_root)
54 | # frame_dir = None if qq_format == 'text' else osp.join(result_root, 'frame')
55 |
56 | #start to pre_track
57 | capture = cv2.VideoCapture(camera_id)
58 | frame_rate = 30
59 | tracker = JDETracker(opt, frame_rate=frame_rate)
60 |
61 | #set current time to be the video-file name(设置当前时间为摄像头保存文件名)
62 | video_name = time.strftime('%Y_%m_%d_%H_%M',time.localtime()) + '_.mp4'
63 | print(f'video_name: {video_name}')
64 |
65 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
66 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
67 | fps = frame_rate
68 | fourcc = cv2.VideoWriter_fourcc(*'mp4v')
69 | writer = cv2.VideoWriter((opt.output_root + '/' + video_name), fourcc, fps, (width, height))
70 | results = []
71 | frame_id = 0
72 | timer = Timer()
73 | use_cuda = True
74 | if set_use_gpu == '-1':
75 | use_cuda = False
76 |
77 | while(True):
78 | # run tracking
79 | ok,frame = capture.read()
80 | if not ok:
81 | break
82 | #frame = cv2.resize(frame, (1920, 1080))
83 | img, _, _, _ = letterbox(frame, height=1088, width=608)
84 | img = img[:, :, ::-1].transpose(2, 0, 1)
85 | img = np.ascontiguousarray(img, dtype=np.float32)
86 | img /= 255.0
87 | timer.tic()
88 | if use_cuda:
89 | blob = torch.from_numpy(img).cuda().unsqueeze(0)
90 | else:
91 | blob = torch.from_numpy(img).unsqueeze(0)
92 | online_targets = tracker.update(blob, frame)
93 | online_tlwhs = []
94 | online_ids = []
95 | online_scores = []
96 | for t in online_targets:
97 | tlwh = t.tlwh
98 | tid = t.track_id
99 | vertical = tlwh[2] / tlwh[3] > 1.6
100 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
101 | online_tlwhs.append(tlwh)
102 | online_ids.append(tid)
103 | online_scores.append(t.score)
104 | timer.toc()
105 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
106 | fps = 1. / timer.average_time
107 | # save results
108 | #results.append((frame_id + 1, online_tlwhs, online_ids))
109 | #results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
110 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids,
111 | frame_id=frame_id, fps=fps)
112 | frame_id += 1
113 | print(f'detect frame: {frame_id}')
114 | im = np.array(online_im)
115 | writer.write(online_im)
116 | cv2.imshow('test',online_im)
117 | if cv2.waitKey(1) & 0xFF == ord('q'):
118 | break
119 | capture.release()
120 | writer.release()
121 | cv2.destroyAllWindows()
122 |
--------------------------------------------------------------------------------
/src/tracker/track.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import _init_paths
6 | import os
7 | import os.path as osp
8 | import cv2
9 | import logging
10 | import argparse
11 | import motmetrics as mm
12 | import numpy as np
13 | import torch
14 |
15 | from lib.tracker.multitracker import JDETracker
16 | from lib.tracking_utils import visualization as vis
17 | from lib.tracking_utils.log import logger
18 | from lib.tracking_utils.timer import Timer
19 | from lib.tracking_utils.evaluation import Evaluator
20 | import lib.datasets.dataset.jde as datasets
21 |
22 | from lib.tracking_utils.utils import mkdir_if_missing
23 | from lib.opts import opts
24 |
25 |
26 | def write_results(filename, results, data_type):
27 | if data_type == 'mot':
28 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
29 | elif data_type == 'kitti':
30 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
31 | else:
32 | raise ValueError(data_type)
33 |
34 | with open(filename, 'w') as f:
35 | for frame_id, tlwhs, track_ids in results:
36 | if data_type == 'kitti':
37 | frame_id -= 1
38 | for tlwh, track_id in zip(tlwhs, track_ids):
39 | if track_id < 0:
40 | continue
41 | x1, y1, w, h = tlwh
42 | x2, y2 = x1 + w, y1 + h
43 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
44 | f.write(line)
45 | logger.info('save results to {}'.format(filename))
46 |
47 |
48 | def write_results_score(filename, results, data_type):
49 | if data_type == 'mot':
50 | save_format = '{frame},{id},{x1},{y1},{w},{h},{s},1,-1,-1,-1\n'
51 | elif data_type == 'kitti':
52 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
53 | else:
54 | raise ValueError(data_type)
55 |
56 | with open(filename, 'w') as f:
57 | for frame_id, tlwhs, track_ids, scores in results:
58 | if data_type == 'kitti':
59 | frame_id -= 1
60 | for tlwh, track_id, score in zip(tlwhs, track_ids, scores):
61 | if track_id < 0:
62 | continue
63 | x1, y1, w, h = tlwh
64 | x2, y2 = x1 + w, y1 + h
65 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, s=score)
66 | f.write(line)
67 | logger.info('save results to {}'.format(filename))
68 |
69 |
70 | def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30, use_cuda=True):
71 | if save_dir:
72 | mkdir_if_missing(save_dir)
73 | tracker = JDETracker(opt, frame_rate=frame_rate)
74 | timer = Timer()
75 | results = []
76 | frame_id = 0
77 | #for path, img, img0 in dataloader:
78 | for i, (path, img, img0) in enumerate(dataloader):
79 | #if i % 8 != 0:
80 | #continue
81 | if frame_id % 20 == 0:
82 | logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time)))
83 |
84 | # run tracking
85 | timer.tic()
86 | if use_cuda:
87 | blob = torch.from_numpy(img).cuda().unsqueeze(0)
88 | else:
89 | blob = torch.from_numpy(img).unsqueeze(0)
90 | online_targets = tracker.update(blob, img0)
91 | online_tlwhs = []
92 | online_ids = []
93 | online_scores = []
94 | for t in online_targets:
95 | tlwh = t.tlwh
96 | tid = t.track_id
97 | vertical = tlwh[2] / tlwh[3] > 1.6
98 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
99 | online_tlwhs.append(tlwh)
100 | online_ids.append(tid)
101 | online_scores.append(t.score)
102 | timer.toc()
103 | # save results
104 | results.append((frame_id + 1, online_tlwhs, online_ids))
105 | #results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
106 | if show_image or save_dir is not None:
107 | online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id,
108 | fps=1. / timer.average_time)
109 | if show_image:
110 | cv2.imshow('online_im', online_im)
111 | if save_dir is not None:
112 | cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im)
113 | frame_id += 1
114 | # save results
115 | #write_results(result_filename, results, data_type)
116 | #write_results_score(result_filename, results, data_type)
117 | return frame_id, timer.average_time, timer.calls
118 |
119 |
120 | def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',), exp_name='demo',
121 | save_images=False, save_videos=False, show_image=True):
122 | logger.setLevel(logging.INFO)
123 | result_root = os.path.join(data_root, '..', 'results', exp_name)
124 | mkdir_if_missing(result_root)
125 | data_type = 'mot'
126 |
127 | # run tracking
128 | accs = []
129 | n_frame = 0
130 | timer_avgs, timer_calls = [], []
131 | for seq in seqs:
132 | output_dir = os.path.join(data_root, '..', 'outputs', exp_name, seq) if save_images or save_videos else None
133 | logger.info('start seq: {}'.format(seq))
134 | dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size)
135 | result_filename = os.path.join(result_root, '{}.txt'.format(seq))
136 | meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read()
137 | frame_rate = int(meta_info[meta_info.find('frameRate') + 10:meta_info.find('\nseqLength')])
138 | nf, ta, tc = eval_seq(opt, dataloader, data_type, result_filename,
139 | save_dir=output_dir, show_image=show_image, frame_rate=frame_rate)
140 | n_frame += nf
141 | timer_avgs.append(ta)
142 | timer_calls.append(tc)
143 |
144 | # eval
145 | logger.info('Evaluate seq: {}'.format(seq))
146 | evaluator = Evaluator(data_root, seq, data_type)
147 | accs.append(evaluator.eval_file(result_filename))
148 | if save_videos:
149 | output_video_path = osp.join(output_dir, '{}.mp4'.format(seq))
150 | cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -c:v copy {}'.format(output_dir, output_video_path)
151 | os.system(cmd_str)
152 | timer_avgs = np.asarray(timer_avgs)
153 | timer_calls = np.asarray(timer_calls)
154 | all_time = np.dot(timer_avgs, timer_calls)
155 | avg_time = all_time / np.sum(timer_calls)
156 | logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(all_time, 1.0 / avg_time))
157 |
158 | # get summary
159 | metrics = mm.metrics.motchallenge_metrics
160 | mh = mm.metrics.create()
161 | summary = Evaluator.get_summary(accs, seqs, metrics)
162 | strsummary = mm.io.render_summary(
163 | summary,
164 | formatters=mh.formatters,
165 | namemap=mm.io.motchallenge_metric_names
166 | )
167 | print(strsummary)
168 | Evaluator.save_summary(summary, os.path.join(result_root, 'summary_{}.xlsx'.format(exp_name)))
169 |
--------------------------------------------------------------------------------
/src/video_track.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import logging
4 | import torch
5 | import time
6 | import numpy as np
7 | from lib.tracking_utils.timer import Timer
8 | from lib.datasets.dataset.jde import letterbox
9 | from lib.tracking_utils.utils import mkdir_if_missing
10 | from lib.tracking_utils import visualization as vis
11 |
12 | def video_tracker(threshold_value,root_dir,input_video_dir,use_gpus,
13 | opts,JDETracker,fourcc,
14 | QMessageBox,progressBar,QApplication,logger):
15 |
16 | try:
17 | threshold = threshold_value
18 |
19 | if os.path.exists(input_video_dir):
20 | file_name = (input_video_dir.split('.')[0]).split('/')[-1] + '_' + time.strftime('%Y_%m_%d_%H_%M',time.localtime())
21 | output_video_dir = root_dir + '/output_video'
22 | print(f'output video dir:{output_video_dir}')
23 | else:
24 | msg_box = QMessageBox(QMessageBox.Warning, 'Warning!', "There is no exist the video file")
25 | msg_box.exec_()
26 |
27 | progressBar.setValue(0)
28 | logger.setLevel(logging.INFO)
29 | model_dir = root_dir + '/models'
30 |
31 | for pth in os.listdir(model_dir):
32 | if pth.split('.')[-1] == 'pth':
33 | model_dir += ('/' + pth)
34 | break
35 | print(f'model_dir: {model_dir}')
36 |
37 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0'
38 | opt = opts(current_dir=root_dir,model_path=model_dir,
39 | input_path=input_video_dir,threshold=threshold,
40 | match_threshold=0.8,use_gpu=use_gpus).init()
41 |
42 | opt.output_root = output_video_dir
43 | print(f'current_use_gpus: {opt.gpus}')
44 |
45 | if opt.output_root:
46 | mkdir_if_missing(opt.output_root)
47 |
48 | # start to pre_track
49 | capture = cv2.VideoCapture(input_video_dir)
50 | frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
51 | print(f'frame_count: {frame_count}')
52 |
53 | progressBar.setMaximum(frame_count)
54 |
55 | # start to run track
56 | frame_rate = 30
57 | tracker = JDETracker(opt, frame_rate=frame_rate)
58 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
59 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
60 | fourcc = fourcc
61 | writer = cv2.VideoWriter(output_video_dir + '/' + file_name + '.mp4',
62 | fourcc, frame_rate, (width, height))
63 | results = []
64 | frame_id = 0
65 | timer = Timer()
66 | use_cuda = True
67 |
68 | if '-1' == use_gpus:
69 | use_cuda = False
70 |
71 | step = 0
72 | while (True):
73 | # run tracking
74 | step += 1
75 | ok, frame = capture.read()
76 | if not ok:
77 | break
78 | frame = cv2.resize(frame, (1920, 1080))
79 | img, _, _, _ = letterbox(frame, height=1088, width=608)
80 | img = img[:, :, ::-1].transpose(2, 0, 1)
81 | img = np.ascontiguousarray(img, dtype=np.float32)
82 | img /= 255.0
83 | timer.tic()
84 |
85 | if use_cuda:
86 | blob = torch.from_numpy(img).cuda().unsqueeze(0)
87 | else:
88 | blob = torch.from_numpy(img).unsqueeze(0)
89 |
90 | online_targets = tracker.update(blob, frame)
91 | online_tlwhs = []
92 | online_ids = []
93 | online_scores = []
94 | for t in online_targets:
95 | tlwh = t.tlwh
96 | tid = t.track_id
97 | vertical = tlwh[2] / tlwh[3] > 1.6
98 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical:
99 | online_tlwhs.append(tlwh)
100 | online_ids.append(tid)
101 | online_scores.append(t.score)
102 | timer.toc()
103 | # save results
104 | # results.append((frame_id + 1, online_tlwhs, online_ids))
105 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores))
106 | fps = 1. / timer.average_time
107 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids,
108 | frame_id=frame_id,fps=fps)
109 | frame_id += 1
110 | print(f'detect frame: {frame_id}')
111 |
112 | writer.write(online_im)
113 | progressBar.setValue(step)
114 | QApplication.processEvents()
115 |
116 | msg_box = QMessageBox(QMessageBox.Warning, '提示!', "视频预测完成")
117 | msg_box.exec_()
118 | writer.release()
119 | capture.release()
120 |
121 | return 1
122 | except:
123 | return 0
--------------------------------------------------------------------------------