├── DCNv2-pytorch_1.9.zip ├── LICENSE ├── README.md ├── README_en.md ├── cython_bbox-0.1.3.tar.gz ├── docs ├── FairMOT.png ├── Interface.png ├── MOT20-01.gif ├── Making_Introduction_cn.md └── Making_Introduction_en.md ├── interface.py ├── lib ├── DCNv2 │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── _ext.cp38-win_amd64.pyd │ ├── dcn_v2.py │ ├── make.sh │ ├── setup.py │ ├── src │ │ ├── cpu │ │ │ ├── dcn_v2_cpu.cpp │ │ │ ├── dcn_v2_im2col_cpu.cpp │ │ │ ├── dcn_v2_im2col_cpu.h │ │ │ ├── dcn_v2_psroi_pooling_cpu.cpp │ │ │ └── vision.h │ │ ├── cuda │ │ │ ├── dcn_v2_cuda.cu │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ └── vision.h │ │ ├── dcn_v2.h │ │ └── vision.cpp │ └── test │ │ ├── test.py │ │ ├── testcpu.py │ │ └── testcuda.py ├── cfg │ ├── crowdhuman.json │ ├── data.json │ ├── data_all.json │ ├── data_half.json │ ├── mot15.json │ ├── mot16.json │ ├── mot17.json │ ├── mot17_half.json │ └── mot20.json ├── datasets │ ├── dataset │ │ ├── __pycache__ │ │ │ ├── jde.cpython-37.pyc │ │ │ └── jde.cpython-38.pyc │ │ ├── jde.py │ │ └── jde_yolov5.py │ └── dataset_factory.py ├── logger.py ├── models │ ├── __pycache__ │ │ ├── common.cpython-38.pyc │ │ ├── decode.cpython-38.pyc │ │ ├── model.cpython-38.pyc │ │ ├── utils.cpython-38.pyc │ │ └── yolo.cpython-38.pyc │ ├── common.py │ ├── data_parallel.py │ ├── decode.py │ ├── losses.py │ ├── model.py │ ├── networks │ │ ├── __pycache__ │ │ │ ├── dlav0.cpython-38.pyc │ │ │ ├── pose_dla_conv.cpython-38.pyc │ │ │ ├── pose_dla_dcn.cpython-38.pyc │ │ │ ├── pose_hrnet.cpython-38.pyc │ │ │ ├── resnet_dcn.cpython-38.pyc │ │ │ └── resnet_fpn_dcn.cpython-38.pyc │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── default.cpython-38.pyc │ │ │ ├── default.py │ │ │ ├── hrnet_w18.yaml │ │ │ ├── hrnet_w32.yaml │ │ │ └── yolov5s.yaml │ │ ├── dlav0.py │ │ ├── pose_dla_conv.py │ │ ├── pose_dla_dcn.py │ │ ├── pose_hrnet.py │ │ ├── resnet_dcn.py │ │ └── resnet_fpn_dcn.py │ ├── scatter_gather.py │ ├── utils.py │ └── yolo.py ├── opts.py ├── tracker │ ├── __pycache__ │ │ ├── basetrack.cpython-38.pyc │ │ ├── matching.cpython-38.pyc │ │ └── multitracker.cpython-38.pyc │ ├── basetrack.py │ ├── matching.py │ └── multitracker.py ├── tracking_utils │ ├── __pycache__ │ │ ├── evaluation.cpython-38.pyc │ │ ├── io.cpython-38.pyc │ │ ├── kalman_filter.cpython-38.pyc │ │ ├── log.cpython-38.pyc │ │ ├── timer.cpython-38.pyc │ │ ├── utils.cpython-38.pyc │ │ └── visualization.cpython-38.pyc │ ├── evaluation.py │ ├── io.py │ ├── kalman_filter.py │ ├── log.py │ ├── nms.py │ ├── parse_config.py │ ├── timer.py │ ├── utils.py │ └── visualization.py ├── trains │ ├── base_trainer.py │ ├── mot.py │ └── train_factory.py └── utils │ ├── __pycache__ │ ├── image.cpython-38.pyc │ ├── post_process.cpython-38.pyc │ └── utils.cpython-38.pyc │ ├── image.py │ ├── post_process.py │ └── utils.py ├── main.py ├── requirements.txt └── src ├── VideoTimer.py ├── built_in_camera_track.py ├── external_camera_track.py ├── tracker ├── __pycache__ │ └── _init_paths.cpython-38.pyc ├── _init_paths.py ├── demo.py ├── detect.py ├── to_track.py ├── to_track_camera.py ├── track.py └── track_half.py └── video_track.py /DCNv2-pytorch_1.9.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/DCNv2-pytorch_1.9.zip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 ReverseSacle(CLX) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | Please follow the original author's License 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CrowdTracker-Pytorch(单摄像头) 2 | 3 | [简体中文](./README.md) | [English](./README_en.md) 4 | 5 | ## 地址导航 6 | 7 | + [→Paddle版地址](https://github.com/ReverseSacle/FairMOT-Paddle-Tracker_Basic) 8 | + [→FairMot作者(Github)](https://github.com/ifzhang/FairMOT) 9 | 10 | ## 效果预览 11 | 12 | ![MOT20-01](./docs/MOT20-01.gif) 13 | 14 | ## 界面预览 15 | 16 | ![Interface](./docs/Interface.png) 17 | 18 | ## 相关介绍 19 | 20 | + [→制作介绍](./docs/Making_Introduction_cn.md) 21 | 22 | ## 环境要求 23 | 24 | + python3 25 | + opencv-python 26 | + DCNv2 27 | + 已运行的测试平台 → window10 28 | + 已经配置好的conda环境(所需要的全部环境的整合) [→OneDrive](https://1drv.ms/u/s!AlYD8lJlPHCIiSrFcXk8xcSq_zLD?e=e51wjQ?download=1) 29 | 30 | ## 调试运行 31 | 32 | + ` git clone "https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic.git"` 33 | + 解压`CrowdTracker-env`环境到`./Anaconda3/envs/`目录下 34 | + 使用编译器,例如Pycharm,调用此`CrowdTracker-env`环境,再在此根目录中创建一个`models`文件夹,将下面的模型权重压缩包解压到此文件夹中 35 | 36 | ## 提供的模型权重文件 37 | 38 | + **下载:** 由原作者提供 [→OneDrive](https://1drv.ms/u/s!AlYD8lJlPHCIh22rxkVDfBph2VCM?e=0Tudce?download=1) 默认需放置根目录的models文件夹下 39 | + **额外缺少的文件:** [→OneDrive](https://1drv.ms/u/s!AlYD8lJlPHCIh2xS1T_M_RBKkTIf?e=iae70F?download=1) 放置在`C:\Users\User name\.cache\torch\hub\checkpoints` 40 | 41 | ## 基础套件 42 | 43 | + `PyQt5` → 界面窗口、按钮组、阈值选择、GPU选择、文件选择与进度条 44 | + `Pytorch` → 深度学习追踪系统 45 | + `OpenCV` → 视频和摄像头追踪,播放与暂停 46 | 47 | ## 更新日志 48 | 49 | 2021.11.29 添加新分支ByteTrack-Kernel,以ByteTrack核心替换了当前的追踪核心 50 | 51 | 2022.12.12 分别将ByteTrack追踪核心与FairMot追踪核心的代码进行了精简化,各将代码拆分成了界面、视频追踪、内置摄像头追踪与外置摄像头追踪。整合了LINK2001错误修复环境。 52 | -------------------------------------------------------------------------------- /README_en.md: -------------------------------------------------------------------------------- 1 | # CrowdTracker-Pytorch(Single Camera) 2 | 3 | [简体中文](https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic/blob/main/README.md) | [English](https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic/blob/main/README_en.md) 4 | 5 | ## Address Navigation 6 | 7 | + [→Paddle edtion address](https://github.com/ReverseSacle/FairMOT-Paddle-Tracker_Basic) 8 | + [→FairMot author(Github)](https://github.com/ifzhang/FairMOT) 9 | 10 | ## Preview 11 | 12 | ![MOT20-01](./docs/MOT20-01.gif) 13 | 14 | ## Preview for Interface 15 | 16 | ![Interface](./docs/Interface.png) 17 | 18 | ## Enviroment Requirement 19 | 20 | + Python3 21 | + opencv-python 22 | + DCNv2 23 | + Had tested system → window10 24 | + The provided pkged enviroment(coda enviroment that has all the needed libs) [→Google Drive](https://drive.google.com/file/d/1cOELR0lXD8oJwzMne0kx_PShylMwclBA/view?usp=sharing) 25 | 26 | ## Introduction 27 | 28 | + [→Making_Introduction](https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic/blob/main/docs/Making_Introduction_en.md) 29 | 30 | ## Provided Model file 31 | 32 | + **Download:** Provided by Original author [→Google Drive](https://drive.google.com/file/d/1PRkK0G5-I9t63cT_YgCetKSpxQEecZ7-/view?usp=sharing) Need to put it into the folder named `models` in root dir 33 | + **Extra missing file:** [→Google Drive](https://drive.google.com/file/d/1sZ0PHOtHkfAHpJ1Na4Ff0SD7NJktFKHq/view?usp=sharing) Put it into `C:\Users\User name\.cache\torch\hub\checkpoints` 34 | 35 | ## Quickly start 36 | 37 | + `git clone "https://github.com/ReverseSacle/CrowdTracker-Pytorch_Basic.git"` 38 | + Unzip the 7z file of `CrowdTracker-env` in `./Anaconda3/envs/` 39 | + Use compiler, like pycharm, where choose the enviroment of CrowdTracker-env on.Then, create a folder named `models` in the root of this project, unzip the beneath mode file into the `models` folder. 40 | 41 | ## About Construction 42 | 43 | + `PyQt5` → the interface,the group of button, the choosing bar of threshold value, the choosing bar of GPU, the function of choosing file and the progress bar 44 | + `Pytorch` → the track system of deep-learning 45 | + `OpenCV` → the function of tracking (video/camera), the function of playing video and stoping video 46 | 47 | ## Update Record 48 | 49 | 2021.11.29 Create a new brach ByteTrack-kernel,replace original mot kernel with bytetrack 50 | 51 | 2022.12.12 Simplify the source code of ByteTrack-track-kernel and FairMot-track-kernel, which is divided into the source code of interface, video tracking,built-in camera tracking and external camera tracking.Fix the error of LINK2001 in the provided compiler enviroment. 52 | 53 | 54 | -------------------------------------------------------------------------------- /cython_bbox-0.1.3.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/cython_bbox-0.1.3.tar.gz -------------------------------------------------------------------------------- /docs/FairMOT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/docs/FairMOT.png -------------------------------------------------------------------------------- /docs/Interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/docs/Interface.png -------------------------------------------------------------------------------- /docs/MOT20-01.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/docs/MOT20-01.gif -------------------------------------------------------------------------------- /docs/Making_Introduction_cn.md: -------------------------------------------------------------------------------- 1 | 制作介绍 2 | === 3 | 4 | 思路 5 | --- 6 | 7 | + **Detection and Re_ID** 8 | 9 | ![FairMOT](./FairMOT.png) 10 | 11 | 该网络主要为目标检测和重识别一体化。 12 | 对于one-stage跟踪都是基于anchor锚,这造成了提取的特征未与对象中心对齐,例如当两个目标相互靠近时,ahchor的位置就不太准确了。 13 | 14 | 图中,用点代表目标来提高位置的准确性。此外,与以往的通过高维特征来Re_ID相比,低维特征对MOT更好,因为它的训练图像比ReID少。学习低维特征有助于减少过拟合小数据的风险,并提高跟踪的稳定性。 15 | 16 | + [Paper_地址](https://arxiv.org/abs/2004.01888) 17 | -------------------------------------------------------------------------------- /docs/Making_Introduction_en.md: -------------------------------------------------------------------------------- 1 | Making Introduction 2 | === 3 | 4 | Idea 5 | --- 6 | 7 | + **Detection and Embedding** 8 | 9 | ![FairMOT](./FairMOT.png) 10 | 11 | + One-shot MOT 12 | + Anchor-free detection 13 | + Learning Low-dimensional features 14 | 15 | + [Paper_adress](https://arxiv.org/abs/2004.01888) 16 | 17 | -------------------------------------------------------------------------------- /lib/DCNv2/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | *.so 4 | *.o 5 | *pyc 6 | _ext 7 | build 8 | DCNv2.egg-info 9 | dist -------------------------------------------------------------------------------- /lib/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /lib/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python test.py # run examples and gradient check 7 | ``` 8 | 9 | ### An Example 10 | - deformable conv 11 | ```python 12 | from dcn_v2 import DCN 13 | input = torch.randn(2, 64, 128, 128).cuda() 14 | # wrap all things (offset and mask) in DCN 15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda() 16 | output = dcn(input) 17 | print(output.shape) 18 | ``` 19 | - deformable roi pooling 20 | ```python 21 | from dcn_v2 import DCNPooling 22 | input = torch.randn(2, 32, 64, 64).cuda() 23 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 24 | x = torch.randint(256, (20, 1)).cuda().float() 25 | y = torch.randint(256, (20, 1)).cuda().float() 26 | w = torch.randint(64, (20, 1)).cuda().float() 27 | h = torch.randint(64, (20, 1)).cuda().float() 28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 29 | 30 | # mdformable pooling (V2) 31 | # wrap all things (offset and mask) in DCNPooling 32 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 33 | pooled_size=7, 34 | output_dim=32, 35 | no_trans=False, 36 | group_size=1, 37 | trans_std=0.1).cuda() 38 | 39 | dout = dpooling(input, rois) 40 | ``` 41 | ### Note 42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with, 43 | ```bash 44 | git checkout pytorch_0.4 45 | ``` 46 | 47 | ### Known Issues: 48 | 49 | - [x] Gradient check w.r.t offset (solved) 50 | - [ ] Backward is not reentrant (minor) 51 | 52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 53 | 54 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes. 55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some 56 | non-differential points? 57 | 58 | Update: all gradient check passes with double precision. 59 | 60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 61 | float `<1e-15` for double), 62 | so it may not be a serious problem (?) 63 | 64 | Please post an issue or PR if you have any comments. 65 | -------------------------------------------------------------------------------- /lib/DCNv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/DCNv2/__init__.py -------------------------------------------------------------------------------- /lib/DCNv2/_ext.cp38-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/DCNv2/_ext.cp38-win_amd64.pyd -------------------------------------------------------------------------------- /lib/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python setup.py build develop 3 | -------------------------------------------------------------------------------- /lib/DCNv2/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import glob 4 | import os 5 | import sys 6 | 7 | import torch 8 | from setuptools import find_packages, setup 9 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 10 | 11 | requirements = ["torch", "torchvision"] 12 | 13 | 14 | def get_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | extensions_dir = os.path.join(this_dir, "src") 17 | 18 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 19 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 20 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 21 | 22 | os.environ["CC"] = "g++" 23 | sources = main_file + source_cpu 24 | extension = CppExtension 25 | extra_compile_args = {"cxx": []} 26 | define_macros = [] 27 | 28 | if torch.cuda.is_available() and CUDA_HOME is not None: 29 | extension = CUDAExtension 30 | sources += source_cuda 31 | define_macros += [("WITH_CUDA", None)] 32 | extra_compile_args["nvcc"] = [ 33 | "-DCUDA_HAS_FP16=1", 34 | "-D__CUDA_NO_HALF_OPERATORS__", 35 | "-D__CUDA_NO_HALF_CONVERSIONS__", 36 | "-D__CUDA_NO_HALF2_OPERATORS__", 37 | ] 38 | else: 39 | # raise NotImplementedError('Cuda is not available') 40 | pass 41 | 42 | extra_compile_args['cxx'].append('-fopenmp') 43 | 44 | sources = [os.path.join(extensions_dir, s) for s in sources] 45 | include_dirs = [extensions_dir] 46 | ext_modules = [ 47 | extension( 48 | "_ext", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | return ext_modules 56 | 57 | 58 | setup( 59 | name="DCNv2", 60 | version="0.1", 61 | author="charlesshang", 62 | url="https://github.com/charlesshang/DCNv2", 63 | description="deformable convolutional networks", 64 | packages=find_packages( 65 | exclude=( 66 | "configs", 67 | "tests", 68 | ) 69 | ), 70 | # install_requires=requirements, 71 | ext_modules=get_extensions(), 72 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 73 | ) 74 | -------------------------------------------------------------------------------- /lib/DCNv2/src/cpu/dcn_v2_im2col_cpu.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | // modified from the CUDA version for CPU use by Daniel K. Suhendro 64 | 65 | #ifndef DCN_V2_IM2COL_CPU 66 | #define DCN_V2_IM2COL_CPU 67 | 68 | #ifdef __cplusplus 69 | extern "C" 70 | { 71 | #endif 72 | 73 | void modulated_deformable_im2col_cpu(const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cpu(const float *data_col, const float *data_offset, const float *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, float *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cpu(const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 88 | const int batch_size, const int channels, const int height_im, const int width_im, 89 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 90 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 91 | const int dilation_h, const int dilation_w, 92 | const int deformable_group, 93 | float *grad_offset, float *grad_mask); 94 | 95 | #ifdef __cplusplus 96 | } 97 | #endif 98 | 99 | #endif -------------------------------------------------------------------------------- /lib/DCNv2/src/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cpu_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cpu_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/DCNv2/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | 64 | #ifndef DCN_V2_IM2COL_CUDA 65 | #define DCN_V2_IM2COL_CUDA 66 | 67 | #ifdef __cplusplus 68 | extern "C" 69 | { 70 | #endif 71 | 72 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 73 | const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 81 | const float *data_col, const float *data_offset, const float *data_mask, 82 | const int batch_size, const int channels, const int height_im, const int width_im, 83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 85 | const int dilation_h, const int dilation_w, 86 | const int deformable_group, float *grad_im); 87 | 88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 90 | const int batch_size, const int channels, const int height_im, const int width_im, 91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 93 | const int dilation_h, const int dilation_w, 94 | const int deformable_group, 95 | float *grad_offset, float *grad_mask); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif -------------------------------------------------------------------------------- /lib/DCNv2/src/cuda/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cuda_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cuda_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/DCNv2/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | at::Tensor 10 | dcn_v2_forward(const at::Tensor &input, 11 | const at::Tensor &weight, 12 | const at::Tensor &bias, 13 | const at::Tensor &offset, 14 | const at::Tensor &mask, 15 | const int kernel_h, 16 | const int kernel_w, 17 | const int stride_h, 18 | const int stride_w, 19 | const int pad_h, 20 | const int pad_w, 21 | const int dilation_h, 22 | const int dilation_w, 23 | const int deformable_group) 24 | { 25 | if (input.is_cuda()) 26 | { 27 | #ifdef WITH_CUDA 28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask, 29 | kernel_h, kernel_w, 30 | stride_h, stride_w, 31 | pad_h, pad_w, 32 | dilation_h, dilation_w, 33 | deformable_group); 34 | #else 35 | AT_ERROR("Not compiled with GPU support"); 36 | #endif 37 | } 38 | else{ 39 | return dcn_v2_cpu_forward(input, weight, bias, offset, mask, 40 | kernel_h, kernel_w, 41 | stride_h, stride_w, 42 | pad_h, pad_w, 43 | dilation_h, dilation_w, 44 | deformable_group); 45 | } 46 | } 47 | 48 | std::vector 49 | dcn_v2_backward(const at::Tensor &input, 50 | const at::Tensor &weight, 51 | const at::Tensor &bias, 52 | const at::Tensor &offset, 53 | const at::Tensor &mask, 54 | const at::Tensor &grad_output, 55 | int kernel_h, int kernel_w, 56 | int stride_h, int stride_w, 57 | int pad_h, int pad_w, 58 | int dilation_h, int dilation_w, 59 | int deformable_group) 60 | { 61 | if (input.is_cuda()) 62 | { 63 | #ifdef WITH_CUDA 64 | return dcn_v2_cuda_backward(input, 65 | weight, 66 | bias, 67 | offset, 68 | mask, 69 | grad_output, 70 | kernel_h, kernel_w, 71 | stride_h, stride_w, 72 | pad_h, pad_w, 73 | dilation_h, dilation_w, 74 | deformable_group); 75 | #else 76 | AT_ERROR("Not compiled with GPU support"); 77 | #endif 78 | } 79 | else{ 80 | return dcn_v2_cpu_backward(input, 81 | weight, 82 | bias, 83 | offset, 84 | mask, 85 | grad_output, 86 | kernel_h, kernel_w, 87 | stride_h, stride_w, 88 | pad_h, pad_w, 89 | dilation_h, dilation_w, 90 | deformable_group); 91 | } 92 | } 93 | 94 | std::tuple 95 | dcn_v2_psroi_pooling_forward(const at::Tensor &input, 96 | const at::Tensor &bbox, 97 | const at::Tensor &trans, 98 | const int no_trans, 99 | const float spatial_scale, 100 | const int output_dim, 101 | const int group_size, 102 | const int pooled_size, 103 | const int part_size, 104 | const int sample_per_part, 105 | const float trans_std) 106 | { 107 | if (input.is_cuda()) 108 | { 109 | #ifdef WITH_CUDA 110 | return dcn_v2_psroi_pooling_cuda_forward(input, 111 | bbox, 112 | trans, 113 | no_trans, 114 | spatial_scale, 115 | output_dim, 116 | group_size, 117 | pooled_size, 118 | part_size, 119 | sample_per_part, 120 | trans_std); 121 | #else 122 | AT_ERROR("Not compiled with GPU support"); 123 | #endif 124 | } 125 | else{ 126 | return dcn_v2_psroi_pooling_cpu_forward(input, 127 | bbox, 128 | trans, 129 | no_trans, 130 | spatial_scale, 131 | output_dim, 132 | group_size, 133 | pooled_size, 134 | part_size, 135 | sample_per_part, 136 | trans_std); 137 | } 138 | } 139 | 140 | std::tuple 141 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad, 142 | const at::Tensor &input, 143 | const at::Tensor &bbox, 144 | const at::Tensor &trans, 145 | const at::Tensor &top_count, 146 | const int no_trans, 147 | const float spatial_scale, 148 | const int output_dim, 149 | const int group_size, 150 | const int pooled_size, 151 | const int part_size, 152 | const int sample_per_part, 153 | const float trans_std) 154 | { 155 | if (input.is_cuda()) 156 | { 157 | #ifdef WITH_CUDA 158 | return dcn_v2_psroi_pooling_cuda_backward(out_grad, 159 | input, 160 | bbox, 161 | trans, 162 | top_count, 163 | no_trans, 164 | spatial_scale, 165 | output_dim, 166 | group_size, 167 | pooled_size, 168 | part_size, 169 | sample_per_part, 170 | trans_std); 171 | #else 172 | AT_ERROR("Not compiled with GPU support"); 173 | #endif 174 | } 175 | else{ 176 | return dcn_v2_psroi_pooling_cpu_backward(out_grad, 177 | input, 178 | bbox, 179 | trans, 180 | top_count, 181 | no_trans, 182 | spatial_scale, 183 | output_dim, 184 | group_size, 185 | pooled_size, 186 | part_size, 187 | sample_per_part, 188 | trans_std); 189 | } 190 | } -------------------------------------------------------------------------------- /lib/DCNv2/src/vision.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dcn_v2.h" 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward"); 6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward"); 7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward"); 8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward"); 9 | } 10 | -------------------------------------------------------------------------------- /lib/DCNv2/test/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import, division, print_function 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import gradcheck 7 | 8 | from dcn_v2 import DCN, DCNPooling, DCNv2, DCNv2Pooling, dcn_v2_conv, dcn_v2_pooling 9 | 10 | deformable_groups = 1 11 | N, inC, inH, inW = 2, 2, 4, 4 12 | outC = 2 13 | kH, kW = 3, 3 14 | 15 | 16 | def conv_identify(weight, bias): 17 | weight.data.zero_() 18 | bias.data.zero_() 19 | o, i, h, w = weight.shape 20 | y = h // 2 21 | x = w // 2 22 | for p in range(i): 23 | for q in range(o): 24 | if p == q: 25 | weight.data[q, p, y, x] = 1.0 26 | 27 | 28 | def check_zero_offset(): 29 | conv_offset = nn.Conv2d( 30 | inC, 31 | deformable_groups * 2 * kH * kW, 32 | kernel_size=(kH, kW), 33 | stride=(1, 1), 34 | padding=(1, 1), 35 | bias=True, 36 | ).cuda() 37 | 38 | conv_mask = nn.Conv2d( 39 | inC, 40 | deformable_groups * 1 * kH * kW, 41 | kernel_size=(kH, kW), 42 | stride=(1, 1), 43 | padding=(1, 1), 44 | bias=True, 45 | ).cuda() 46 | 47 | dcn_v2 = DCNv2(inC, outC, (kH, kW), stride=1, padding=1, dilation=1, deformable_groups=deformable_groups).cuda() 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW).cuda() 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print("Zero offset passed") 64 | else: 65 | print("Zero offset failed") 66 | print(input) 67 | print(output) 68 | 69 | 70 | def check_gradient_dconv(): 71 | 72 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01 73 | input.requires_grad = True 74 | 75 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2 76 | # offset.data.zero_() 77 | # offset.data -= 0.5 78 | offset.requires_grad = True 79 | 80 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda() 81 | # mask.data.zero_() 82 | mask.requires_grad = True 83 | mask = torch.sigmoid(mask) 84 | 85 | weight = torch.randn(outC, inC, kH, kW).cuda() 86 | weight.requires_grad = True 87 | 88 | bias = torch.rand(outC).cuda() 89 | bias.requires_grad = True 90 | 91 | stride = 1 92 | padding = 1 93 | dilation = 1 94 | 95 | print( 96 | "check_gradient_dconv: ", 97 | gradcheck( 98 | dcn_v2_conv, 99 | (input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups), 100 | eps=1e-3, 101 | atol=1e-4, 102 | rtol=1e-2, 103 | ), 104 | ) 105 | 106 | 107 | def check_pooling_zero_offset(): 108 | 109 | input = torch.randn(2, 16, 64, 64).cuda().zero_() 110 | input[0, :, 16:26, 16:26] = 1.0 111 | input[1, :, 10:20, 20:30] = 2.0 112 | rois = ( 113 | torch.tensor( 114 | [ 115 | [0, 65, 65, 103, 103], 116 | [1, 81, 41, 119, 79], 117 | ] 118 | ) 119 | .cuda() 120 | .float() 121 | ) 122 | pooling = DCNv2Pooling( 123 | spatial_scale=1.0 / 4, 124 | pooled_size=7, 125 | output_dim=16, 126 | no_trans=True, 127 | group_size=1, 128 | trans_std=0.0, 129 | ).cuda() 130 | 131 | out = pooling(input, rois, input.new()) 132 | s = ", ".join(["%f" % out[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 133 | print(s) 134 | 135 | dpooling = DCNv2Pooling( 136 | spatial_scale=1.0 / 4, 137 | pooled_size=7, 138 | output_dim=16, 139 | no_trans=False, 140 | group_size=1, 141 | trans_std=0.0, 142 | ).cuda() 143 | offset = torch.randn(20, 2, 7, 7).cuda().zero_() 144 | dout = dpooling(input, rois, offset) 145 | s = ", ".join(["%f" % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 146 | print(s) 147 | 148 | 149 | def check_gradient_dpooling(): 150 | input = torch.randn(2, 3, 5, 5).cuda() * 0.01 151 | N = 4 152 | batch_inds = torch.randint(2, (N, 1)).cuda().float() 153 | x = torch.rand((N, 1)).cuda().float() * 15 154 | y = torch.rand((N, 1)).cuda().float() * 15 155 | w = torch.rand((N, 1)).cuda().float() * 10 156 | h = torch.rand((N, 1)).cuda().float() * 10 157 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 158 | offset = torch.randn(N, 2, 3, 3).cuda() 159 | input.requires_grad = True 160 | offset.requires_grad = True 161 | 162 | spatial_scale = 1.0 / 4 163 | pooled_size = 3 164 | output_dim = 3 165 | no_trans = 0 166 | group_size = 1 167 | trans_std = 0.0 168 | sample_per_part = 4 169 | part_size = pooled_size 170 | 171 | print( 172 | "check_gradient_dpooling:", 173 | gradcheck( 174 | dcn_v2_pooling, 175 | ( 176 | input, 177 | rois, 178 | offset, 179 | spatial_scale, 180 | pooled_size, 181 | output_dim, 182 | no_trans, 183 | group_size, 184 | part_size, 185 | sample_per_part, 186 | trans_std, 187 | ), 188 | eps=1e-4, 189 | ), 190 | ) 191 | 192 | 193 | def example_dconv(): 194 | input = torch.randn(2, 64, 128, 128).cuda() 195 | # wrap all things (offset and mask) in DCN 196 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, padding=1, deformable_groups=2).cuda() 197 | # print(dcn.weight.shape, input.shape) 198 | output = dcn(input) 199 | targert = output.new(*output.size()) 200 | targert.data.uniform_(-0.01, 0.01) 201 | error = (targert - output).mean() 202 | error.backward() 203 | print(output.shape) 204 | 205 | 206 | def example_dpooling(): 207 | input = torch.randn(2, 32, 64, 64).cuda() 208 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 209 | x = torch.randint(256, (20, 1)).cuda().float() 210 | y = torch.randint(256, (20, 1)).cuda().float() 211 | w = torch.randint(64, (20, 1)).cuda().float() 212 | h = torch.randint(64, (20, 1)).cuda().float() 213 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 214 | offset = torch.randn(20, 2, 7, 7).cuda() 215 | input.requires_grad = True 216 | offset.requires_grad = True 217 | 218 | # normal roi_align 219 | pooling = DCNv2Pooling( 220 | spatial_scale=1.0 / 4, 221 | pooled_size=7, 222 | output_dim=32, 223 | no_trans=True, 224 | group_size=1, 225 | trans_std=0.1, 226 | ).cuda() 227 | 228 | # deformable pooling 229 | dpooling = DCNv2Pooling( 230 | spatial_scale=1.0 / 4, 231 | pooled_size=7, 232 | output_dim=32, 233 | no_trans=False, 234 | group_size=1, 235 | trans_std=0.1, 236 | ).cuda() 237 | 238 | out = pooling(input, rois, offset) 239 | dout = dpooling(input, rois, offset) 240 | print(out.shape) 241 | print(dout.shape) 242 | 243 | target_out = out.new(*out.size()) 244 | target_out.data.uniform_(-0.01, 0.01) 245 | target_dout = dout.new(*dout.size()) 246 | target_dout.data.uniform_(-0.01, 0.01) 247 | e = (target_out - out).mean() 248 | e.backward() 249 | e = (target_dout - dout).mean() 250 | e.backward() 251 | 252 | 253 | def example_mdpooling(): 254 | input = torch.randn(2, 32, 64, 64).cuda() 255 | input.requires_grad = True 256 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 257 | x = torch.randint(256, (20, 1)).cuda().float() 258 | y = torch.randint(256, (20, 1)).cuda().float() 259 | w = torch.randint(64, (20, 1)).cuda().float() 260 | h = torch.randint(64, (20, 1)).cuda().float() 261 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 262 | 263 | # mdformable pooling (V2) 264 | dpooling = DCNPooling( 265 | spatial_scale=1.0 / 4, 266 | pooled_size=7, 267 | output_dim=32, 268 | no_trans=False, 269 | group_size=1, 270 | trans_std=0.1, 271 | deform_fc_dim=1024, 272 | ).cuda() 273 | 274 | dout = dpooling(input, rois) 275 | target = dout.new(*dout.size()) 276 | target.data.uniform_(-0.1, 0.1) 277 | error = (target - dout).mean() 278 | error.backward() 279 | print(dout.shape) 280 | 281 | 282 | if __name__ == "__main__": 283 | 284 | example_dconv() 285 | example_dpooling() 286 | example_mdpooling() 287 | 288 | check_pooling_zero_offset() 289 | # zero offset check 290 | if inC == outC: 291 | check_zero_offset() 292 | 293 | check_gradient_dpooling() 294 | check_gradient_dconv() 295 | # """ 296 | # ****** Note: backward is not reentrant error may not be a serious problem, 297 | # ****** since the max error is less than 1e-7, 298 | # ****** Still looking for what trigger this problem 299 | # """ 300 | -------------------------------------------------------------------------------- /lib/DCNv2/test/testcpu.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import, division, print_function 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import gradcheck 7 | 8 | from dcn_v2 import DCN, DCNPooling, DCNv2, DCNv2Pooling, dcn_v2_conv, dcn_v2_pooling 9 | 10 | deformable_groups = 1 11 | N, inC, inH, inW = 2, 2, 4, 4 12 | outC = 2 13 | kH, kW = 3, 3 14 | 15 | 16 | def conv_identify(weight, bias): 17 | weight.data.zero_() 18 | bias.data.zero_() 19 | o, i, h, w = weight.shape 20 | y = h // 2 21 | x = w // 2 22 | for p in range(i): 23 | for q in range(o): 24 | if p == q: 25 | weight.data[q, p, y, x] = 1.0 26 | 27 | 28 | def check_zero_offset(): 29 | conv_offset = nn.Conv2d( 30 | inC, 31 | deformable_groups * 2 * kH * kW, 32 | kernel_size=(kH, kW), 33 | stride=(1, 1), 34 | padding=(1, 1), 35 | bias=True, 36 | ) 37 | 38 | conv_mask = nn.Conv2d( 39 | inC, 40 | deformable_groups * 1 * kH * kW, 41 | kernel_size=(kH, kW), 42 | stride=(1, 1), 43 | padding=(1, 1), 44 | bias=True, 45 | ) 46 | 47 | dcn_v2 = DCNv2(inC, outC, (kH, kW), stride=1, padding=1, dilation=1, deformable_groups=deformable_groups) 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW) 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print("Zero offset passed") 64 | else: 65 | print("Zero offset failed") 66 | print(input) 67 | print(output) 68 | 69 | 70 | def check_gradient_dconv(): 71 | 72 | input = torch.rand(N, inC, inH, inW) * 0.01 73 | input.requires_grad = True 74 | 75 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW) * 2 76 | # offset.data.zero_() 77 | # offset.data -= 0.5 78 | offset.requires_grad = True 79 | 80 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW) 81 | # mask.data.zero_() 82 | mask.requires_grad = True 83 | mask = torch.sigmoid(mask) 84 | 85 | weight = torch.randn(outC, inC, kH, kW) 86 | weight.requires_grad = True 87 | 88 | bias = torch.rand(outC) 89 | bias.requires_grad = True 90 | 91 | stride = 1 92 | padding = 1 93 | dilation = 1 94 | 95 | print( 96 | "check_gradient_dconv: ", 97 | gradcheck( 98 | dcn_v2_conv, 99 | (input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups), 100 | eps=1e-3, 101 | atol=1e-4, 102 | rtol=1e-2, 103 | ), 104 | ) 105 | 106 | 107 | def check_pooling_zero_offset(): 108 | 109 | input = torch.randn(2, 16, 64, 64).zero_() 110 | input[0, :, 16:26, 16:26] = 1.0 111 | input[1, :, 10:20, 20:30] = 2.0 112 | rois = torch.tensor( 113 | [ 114 | [0, 65, 65, 103, 103], 115 | [1, 81, 41, 119, 79], 116 | ] 117 | ).float() 118 | pooling = DCNv2Pooling( 119 | spatial_scale=1.0 / 4, 120 | pooled_size=7, 121 | output_dim=16, 122 | no_trans=True, 123 | group_size=1, 124 | trans_std=0.0, 125 | ) 126 | 127 | out = pooling(input, rois, input.new()) 128 | s = ", ".join(["%f" % out[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 129 | print(s) 130 | 131 | dpooling = DCNv2Pooling( 132 | spatial_scale=1.0 / 4, 133 | pooled_size=7, 134 | output_dim=16, 135 | no_trans=False, 136 | group_size=1, 137 | trans_std=0.0, 138 | ) 139 | offset = torch.randn(20, 2, 7, 7).zero_() 140 | dout = dpooling(input, rois, offset) 141 | s = ", ".join(["%f" % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 142 | print(s) 143 | 144 | 145 | def check_gradient_dpooling(): 146 | input = torch.randn(2, 3, 5, 5) * 0.01 147 | N = 4 148 | batch_inds = torch.randint(2, (N, 1)).float() 149 | x = torch.rand((N, 1)).float() * 15 150 | y = torch.rand((N, 1)).float() * 15 151 | w = torch.rand((N, 1)).float() * 10 152 | h = torch.rand((N, 1)).float() * 10 153 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 154 | offset = torch.randn(N, 2, 3, 3) 155 | input.requires_grad = True 156 | offset.requires_grad = True 157 | 158 | spatial_scale = 1.0 / 4 159 | pooled_size = 3 160 | output_dim = 3 161 | no_trans = 0 162 | group_size = 1 163 | trans_std = 0.0 164 | sample_per_part = 4 165 | part_size = pooled_size 166 | 167 | print( 168 | "check_gradient_dpooling:", 169 | gradcheck( 170 | dcn_v2_pooling, 171 | ( 172 | input, 173 | rois, 174 | offset, 175 | spatial_scale, 176 | pooled_size, 177 | output_dim, 178 | no_trans, 179 | group_size, 180 | part_size, 181 | sample_per_part, 182 | trans_std, 183 | ), 184 | eps=1e-4, 185 | ), 186 | ) 187 | 188 | 189 | def example_dconv(): 190 | input = torch.randn(2, 64, 128, 128) 191 | # wrap all things (offset and mask) in DCN 192 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, padding=1, deformable_groups=2) 193 | # print(dcn.weight.shape, input.shape) 194 | output = dcn(input) 195 | targert = output.new(*output.size()) 196 | targert.data.uniform_(-0.01, 0.01) 197 | error = (targert - output).mean() 198 | error.backward() 199 | print(output.shape) 200 | 201 | 202 | def example_dpooling(): 203 | input = torch.randn(2, 32, 64, 64) 204 | batch_inds = torch.randint(2, (20, 1)).float() 205 | x = torch.randint(256, (20, 1)).float() 206 | y = torch.randint(256, (20, 1)).float() 207 | w = torch.randint(64, (20, 1)).float() 208 | h = torch.randint(64, (20, 1)).float() 209 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 210 | offset = torch.randn(20, 2, 7, 7) 211 | input.requires_grad = True 212 | offset.requires_grad = True 213 | 214 | # normal roi_align 215 | pooling = DCNv2Pooling( 216 | spatial_scale=1.0 / 4, 217 | pooled_size=7, 218 | output_dim=32, 219 | no_trans=True, 220 | group_size=1, 221 | trans_std=0.1, 222 | ) 223 | 224 | # deformable pooling 225 | dpooling = DCNv2Pooling( 226 | spatial_scale=1.0 / 4, 227 | pooled_size=7, 228 | output_dim=32, 229 | no_trans=False, 230 | group_size=1, 231 | trans_std=0.1, 232 | ) 233 | 234 | out = pooling(input, rois, offset) 235 | dout = dpooling(input, rois, offset) 236 | print(out.shape) 237 | print(dout.shape) 238 | 239 | target_out = out.new(*out.size()) 240 | target_out.data.uniform_(-0.01, 0.01) 241 | target_dout = dout.new(*dout.size()) 242 | target_dout.data.uniform_(-0.01, 0.01) 243 | e = (target_out - out).mean() 244 | e.backward() 245 | e = (target_dout - dout).mean() 246 | e.backward() 247 | 248 | 249 | def example_mdpooling(): 250 | input = torch.randn(2, 32, 64, 64) 251 | input.requires_grad = True 252 | batch_inds = torch.randint(2, (20, 1)).float() 253 | x = torch.randint(256, (20, 1)).float() 254 | y = torch.randint(256, (20, 1)).float() 255 | w = torch.randint(64, (20, 1)).float() 256 | h = torch.randint(64, (20, 1)).float() 257 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 258 | 259 | # mdformable pooling (V2) 260 | dpooling = DCNPooling( 261 | spatial_scale=1.0 / 4, 262 | pooled_size=7, 263 | output_dim=32, 264 | no_trans=False, 265 | group_size=1, 266 | trans_std=0.1, 267 | deform_fc_dim=1024, 268 | ) 269 | 270 | dout = dpooling(input, rois) 271 | target = dout.new(*dout.size()) 272 | target.data.uniform_(-0.1, 0.1) 273 | error = (target - dout).mean() 274 | error.backward() 275 | print(dout.shape) 276 | 277 | 278 | if __name__ == "__main__": 279 | 280 | example_dconv() 281 | example_dpooling() 282 | example_mdpooling() 283 | 284 | check_pooling_zero_offset() 285 | # zero offset check 286 | if inC == outC: 287 | check_zero_offset() 288 | 289 | check_gradient_dpooling() 290 | check_gradient_dconv() 291 | # """ 292 | # ****** Note: backward is not reentrant error may not be a serious problem, 293 | # ****** since the max error is less than 1e-7, 294 | # ****** Still looking for what trigger this problem 295 | # """ 296 | -------------------------------------------------------------------------------- /lib/DCNv2/test/testcuda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import, division, print_function 3 | 4 | import torch 5 | import torch.nn as nn 6 | from torch.autograd import gradcheck 7 | 8 | from dcn_v2 import DCN, DCNPooling, DCNv2, DCNv2Pooling, dcn_v2_conv, dcn_v2_pooling 9 | 10 | deformable_groups = 1 11 | N, inC, inH, inW = 2, 2, 4, 4 12 | outC = 2 13 | kH, kW = 3, 3 14 | 15 | 16 | def conv_identify(weight, bias): 17 | weight.data.zero_() 18 | bias.data.zero_() 19 | o, i, h, w = weight.shape 20 | y = h // 2 21 | x = w // 2 22 | for p in range(i): 23 | for q in range(o): 24 | if p == q: 25 | weight.data[q, p, y, x] = 1.0 26 | 27 | 28 | def check_zero_offset(): 29 | conv_offset = nn.Conv2d( 30 | inC, 31 | deformable_groups * 2 * kH * kW, 32 | kernel_size=(kH, kW), 33 | stride=(1, 1), 34 | padding=(1, 1), 35 | bias=True, 36 | ).cuda() 37 | 38 | conv_mask = nn.Conv2d( 39 | inC, 40 | deformable_groups * 1 * kH * kW, 41 | kernel_size=(kH, kW), 42 | stride=(1, 1), 43 | padding=(1, 1), 44 | bias=True, 45 | ).cuda() 46 | 47 | dcn_v2 = DCNv2(inC, outC, (kH, kW), stride=1, padding=1, dilation=1, deformable_groups=deformable_groups).cuda() 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW).cuda() 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print("Zero offset passed") 64 | else: 65 | print("Zero offset failed") 66 | print(input) 67 | print(output) 68 | 69 | 70 | def check_gradient_dconv(): 71 | 72 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01 73 | input.requires_grad = True 74 | 75 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2 76 | # offset.data.zero_() 77 | # offset.data -= 0.5 78 | offset.requires_grad = True 79 | 80 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda() 81 | # mask.data.zero_() 82 | mask.requires_grad = True 83 | mask = torch.sigmoid(mask) 84 | 85 | weight = torch.randn(outC, inC, kH, kW).cuda() 86 | weight.requires_grad = True 87 | 88 | bias = torch.rand(outC).cuda() 89 | bias.requires_grad = True 90 | 91 | stride = 1 92 | padding = 1 93 | dilation = 1 94 | 95 | print( 96 | "check_gradient_dconv: ", 97 | gradcheck( 98 | dcn_v2_conv, 99 | (input, offset, mask, weight, bias, stride, padding, dilation, deformable_groups), 100 | eps=1e-3, 101 | atol=1e-4, 102 | rtol=1e-2, 103 | ), 104 | ) 105 | 106 | 107 | def check_pooling_zero_offset(): 108 | 109 | input = torch.randn(2, 16, 64, 64).cuda().zero_() 110 | input[0, :, 16:26, 16:26] = 1.0 111 | input[1, :, 10:20, 20:30] = 2.0 112 | rois = ( 113 | torch.tensor( 114 | [ 115 | [0, 65, 65, 103, 103], 116 | [1, 81, 41, 119, 79], 117 | ] 118 | ) 119 | .cuda() 120 | .float() 121 | ) 122 | pooling = DCNv2Pooling( 123 | spatial_scale=1.0 / 4, 124 | pooled_size=7, 125 | output_dim=16, 126 | no_trans=True, 127 | group_size=1, 128 | trans_std=0.0, 129 | ).cuda() 130 | 131 | out = pooling(input, rois, input.new()) 132 | s = ", ".join(["%f" % out[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 133 | print(s) 134 | 135 | dpooling = DCNv2Pooling( 136 | spatial_scale=1.0 / 4, 137 | pooled_size=7, 138 | output_dim=16, 139 | no_trans=False, 140 | group_size=1, 141 | trans_std=0.0, 142 | ).cuda() 143 | offset = torch.randn(20, 2, 7, 7).cuda().zero_() 144 | dout = dpooling(input, rois, offset) 145 | s = ", ".join(["%f" % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 146 | print(s) 147 | 148 | 149 | def check_gradient_dpooling(): 150 | input = torch.randn(2, 3, 5, 5).cuda().float() * 0.01 151 | N = 4 152 | batch_inds = torch.randint(2, (N, 1)).cuda().float() 153 | x = torch.rand((N, 1)).cuda().float() * 15 154 | y = torch.rand((N, 1)).cuda().float() * 15 155 | w = torch.rand((N, 1)).cuda().float() * 10 156 | h = torch.rand((N, 1)).cuda().float() * 10 157 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 158 | offset = torch.randn(N, 2, 3, 3).cuda() 159 | input.requires_grad = True 160 | offset.requires_grad = True 161 | 162 | spatial_scale = 1.0 / 4 163 | pooled_size = 3 164 | output_dim = 3 165 | no_trans = 0 166 | group_size = 1 167 | trans_std = 0.0 168 | sample_per_part = 4 169 | part_size = pooled_size 170 | 171 | print( 172 | "check_gradient_dpooling:", 173 | gradcheck( 174 | dcn_v2_pooling, 175 | ( 176 | input, 177 | rois, 178 | offset, 179 | spatial_scale, 180 | pooled_size, 181 | output_dim, 182 | no_trans, 183 | group_size, 184 | part_size, 185 | sample_per_part, 186 | trans_std, 187 | ), 188 | eps=1e-4, 189 | ), 190 | ) 191 | 192 | 193 | def example_dconv(): 194 | input = torch.randn(2, 64, 128, 128).cuda() 195 | # wrap all things (offset and mask) in DCN 196 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, padding=1, deformable_groups=2).cuda() 197 | # print(dcn.weight.shape, input.shape) 198 | output = dcn(input) 199 | targert = output.new(*output.size()) 200 | targert.data.uniform_(-0.01, 0.01) 201 | error = (targert - output).mean() 202 | error.backward() 203 | print(output.shape) 204 | 205 | 206 | def example_dpooling(): 207 | input = torch.randn(2, 32, 64, 64).cuda() 208 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 209 | x = torch.randint(256, (20, 1)).cuda().float() 210 | y = torch.randint(256, (20, 1)).cuda().float() 211 | w = torch.randint(64, (20, 1)).cuda().float() 212 | h = torch.randint(64, (20, 1)).cuda().float() 213 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 214 | offset = torch.randn(20, 2, 7, 7).cuda() 215 | input.requires_grad = True 216 | offset.requires_grad = True 217 | 218 | # normal roi_align 219 | pooling = DCNv2Pooling( 220 | spatial_scale=1.0 / 4, 221 | pooled_size=7, 222 | output_dim=32, 223 | no_trans=True, 224 | group_size=1, 225 | trans_std=0.1, 226 | ).cuda() 227 | 228 | # deformable pooling 229 | dpooling = DCNv2Pooling( 230 | spatial_scale=1.0 / 4, 231 | pooled_size=7, 232 | output_dim=32, 233 | no_trans=False, 234 | group_size=1, 235 | trans_std=0.1, 236 | ).cuda() 237 | 238 | out = pooling(input, rois, offset) 239 | dout = dpooling(input, rois, offset) 240 | print(out.shape) 241 | print(dout.shape) 242 | 243 | target_out = out.new(*out.size()) 244 | target_out.data.uniform_(-0.01, 0.01) 245 | target_dout = dout.new(*dout.size()) 246 | target_dout.data.uniform_(-0.01, 0.01) 247 | e = (target_out - out).mean() 248 | e.backward() 249 | e = (target_dout - dout).mean() 250 | e.backward() 251 | 252 | 253 | def example_mdpooling(): 254 | input = torch.randn(2, 32, 64, 64).cuda() 255 | input.requires_grad = True 256 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 257 | x = torch.randint(256, (20, 1)).cuda().float() 258 | y = torch.randint(256, (20, 1)).cuda().float() 259 | w = torch.randint(64, (20, 1)).cuda().float() 260 | h = torch.randint(64, (20, 1)).cuda().float() 261 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 262 | 263 | # mdformable pooling (V2) 264 | dpooling = DCNPooling( 265 | spatial_scale=1.0 / 4, 266 | pooled_size=7, 267 | output_dim=32, 268 | no_trans=False, 269 | group_size=1, 270 | trans_std=0.1, 271 | deform_fc_dim=1024, 272 | ).cuda() 273 | 274 | dout = dpooling(input, rois) 275 | target = dout.new(*dout.size()) 276 | target.data.uniform_(-0.1, 0.1) 277 | error = (target - dout).mean() 278 | error.backward() 279 | print(dout.shape) 280 | 281 | 282 | if __name__ == "__main__": 283 | 284 | example_dconv() 285 | example_dpooling() 286 | example_mdpooling() 287 | 288 | check_pooling_zero_offset() 289 | # zero offset check 290 | if inC == outC: 291 | check_zero_offset() 292 | 293 | check_gradient_dpooling() 294 | check_gradient_dconv() 295 | # """ 296 | # ****** Note: backward is not reentrant error may not be a serious problem, 297 | # ****** since the max error is less than 1e-7, 298 | # ****** Still looking for what trigger this problem 299 | # """ 300 | -------------------------------------------------------------------------------- /lib/cfg/crowdhuman.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "crowdhuman_train":"./data/crowdhuman.train", 6 | "crowdhuman_test":"./data/crowdhuman.val" 7 | }, 8 | "test_emb": 9 | { 10 | "mot15":"./data/mot15.val" 11 | }, 12 | "test": 13 | { 14 | "mot15":"./data/mot15.val" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /lib/cfg/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot17":"./data/mot17.train", 6 | "caltech":"./data/caltech.all", 7 | "citypersons":"./data/citypersons.train", 8 | "cuhksysu":"./data/cuhksysu.train", 9 | "prw":"./data/prw.train", 10 | "eth":"./data/eth.train" 11 | }, 12 | "test_emb": 13 | { 14 | "mot15":"./data/mot15.val" 15 | }, 16 | "test": 17 | { 18 | "mot15":"./data/mot15.val" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /lib/cfg/data_all.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot17":"./data/mot17.train", 6 | "caltech":"./data/caltech.all", 7 | "citypersons":"./data/citypersons.train", 8 | "cuhksysu":"./data/cuhksysu.train", 9 | "prw":"./data/prw.train", 10 | "eth":"./data/eth.train", 11 | "crowdhuman_train":"./data/crowdhuman.train", 12 | "crowdhuman_val":"./data/crowdhuman.val" 13 | }, 14 | "test_emb": 15 | { 16 | "mot15":"./data/mot15.val" 17 | }, 18 | "test": 19 | { 20 | "mot15":"./data/mot15.val" 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /lib/cfg/data_half.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot17":"./data/mot17.half", 6 | "caltech":"./data/caltech.all", 7 | "citypersons":"./data/citypersons.train", 8 | "cuhksysu":"./data/cuhksysu.train", 9 | "prw":"./data/prw.train", 10 | "eth":"./data/eth.train" 11 | }, 12 | "test_emb": 13 | { 14 | "mot15":"./data/mot15.val" 15 | }, 16 | "test": 17 | { 18 | "mot15":"./data/mot15.val" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /lib/cfg/mot15.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot15":"./data/mot15.train" 6 | }, 7 | "test_emb": 8 | { 9 | "mot15":"./data/mot15.train" 10 | }, 11 | "test": 12 | { 13 | "mot15":"./data/mot15.train" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /lib/cfg/mot16.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot17":"./data/mot16.train" 6 | }, 7 | "test_emb": 8 | { 9 | "mot17":"./data/mot16.train" 10 | }, 11 | "test": 12 | { 13 | "mot17":"./data/mot16.train" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /lib/cfg/mot17.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot17":"./data/mot17.train" 6 | }, 7 | "test_emb": 8 | { 9 | "mot17":"./data/mot17.train" 10 | }, 11 | "test": 12 | { 13 | "mot17":"./data/mot17.train" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /lib/cfg/mot17_half.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot17":"./data/mot17.half" 6 | }, 7 | "test_emb": 8 | { 9 | "mot17":"./data/mot17.emb" 10 | }, 11 | "test": 12 | { 13 | "mot17":"./data/mot17.val" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /lib/cfg/mot20.json: -------------------------------------------------------------------------------- 1 | { 2 | "root":"/home/zyf/dataset", 3 | "train": 4 | { 5 | "mot20":"./data/mot20.train" 6 | }, 7 | "test_emb": 8 | { 9 | "mot20":"./data/mot20.train" 10 | }, 11 | "test": 12 | { 13 | "mot20":"./data/mot20.train" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /lib/datasets/dataset/__pycache__/jde.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/datasets/dataset/__pycache__/jde.cpython-37.pyc -------------------------------------------------------------------------------- /lib/datasets/dataset/__pycache__/jde.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/datasets/dataset/__pycache__/jde.cpython-38.pyc -------------------------------------------------------------------------------- /lib/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .dataset.jde import JointDataset 6 | 7 | 8 | def get_dataset(dataset, task): 9 | if task == 'mot': 10 | return JointDataset 11 | else: 12 | return None 13 | 14 | -------------------------------------------------------------------------------- /lib/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 6 | import os 7 | import time 8 | import sys 9 | import torch 10 | USE_TENSORBOARD = True 11 | try: 12 | import tensorboardX 13 | print('Using tensorboardX') 14 | except: 15 | USE_TENSORBOARD = False 16 | 17 | class Logger(object): 18 | def __init__(self, opt): 19 | """Create a summary writer logging to log_dir.""" 20 | if not os.path.exists(opt.save_dir): 21 | os.makedirs(opt.save_dir) 22 | if not os.path.exists(opt.debug_dir): 23 | os.makedirs(opt.debug_dir) 24 | 25 | time_str = time.strftime('%Y-%m-%d-%H-%M') 26 | 27 | args = dict((name, getattr(opt, name)) for name in dir(opt) 28 | if not name.startswith('_')) 29 | file_name = os.path.join(opt.save_dir, 'opt.txt') 30 | with open(file_name, 'wt') as opt_file: 31 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 32 | opt_file.write('==> cudnn version: {}\n'.format( 33 | torch.backends.cudnn.version())) 34 | opt_file.write('==> Cmd:\n') 35 | opt_file.write(str(sys.argv)) 36 | opt_file.write('\n==> Opt:\n') 37 | for k, v in sorted(args.items()): 38 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 39 | 40 | log_dir = opt.save_dir + '/logs_{}'.format(time_str) 41 | if USE_TENSORBOARD: 42 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 43 | else: 44 | if not os.path.exists(os.path.dirname(log_dir)): 45 | os.mkdir(os.path.dirname(log_dir)) 46 | if not os.path.exists(log_dir): 47 | os.mkdir(log_dir) 48 | self.log = open(log_dir + '/log.txt', 'w') 49 | try: 50 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir)) 51 | except: 52 | pass 53 | self.start_line = True 54 | 55 | def write(self, txt): 56 | if self.start_line: 57 | time_str = time.strftime('%Y-%m-%d-%H-%M') 58 | self.log.write('{}: {}'.format(time_str, txt)) 59 | else: 60 | self.log.write(txt) 61 | self.start_line = False 62 | if '\n' in txt: 63 | self.start_line = True 64 | self.log.flush() 65 | 66 | def close(self): 67 | self.log.close() 68 | 69 | def scalar_summary(self, tag, value, step): 70 | """Log a scalar variable.""" 71 | if USE_TENSORBOARD: 72 | self.writer.add_scalar(tag, value, step) 73 | -------------------------------------------------------------------------------- /lib/models/__pycache__/common.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/common.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/decode.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/decode.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/__pycache__/yolo.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/__pycache__/yolo.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/common.py: -------------------------------------------------------------------------------- 1 | # YOLOv5 common modules 2 | 3 | import math 4 | from copy import copy 5 | from pathlib import Path 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import torch 10 | import torch.nn as nn 11 | from PIL import Image 12 | from torch.cuda import amp 13 | from lib.DCNv2.dcn_v2 import DCN 14 | 15 | 16 | def autopad(k, p=None): # kernel, padding 17 | # Pad to 'same' 18 | if p is None: 19 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 20 | return p 21 | 22 | 23 | def DWConv(c1, c2, k=1, s=1, act=True): 24 | # Depthwise convolution 25 | return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act) 26 | 27 | 28 | class Conv(nn.Module): 29 | # Standard convolution 30 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 31 | super(Conv, self).__init__() 32 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 33 | self.bn = nn.BatchNorm2d(c2) 34 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 35 | 36 | def forward(self, x): 37 | return self.act(self.bn(self.conv(x))) 38 | 39 | def fuseforward(self, x): 40 | return self.act(self.conv(x)) 41 | 42 | 43 | class DeConv(nn.Module): 44 | # convtranspose 45 | def __init__(self, c1, c2, k=4, s=2): 46 | super(DeConv, self).__init__() 47 | self.deconv = nn.ConvTranspose2d( 48 | in_channels=c1, 49 | out_channels=c2, 50 | kernel_size=k, 51 | stride=s, 52 | padding=1, 53 | output_padding=0, 54 | bias=False) 55 | self.bn = nn.BatchNorm2d(c2) 56 | self.act = nn.SiLU() 57 | 58 | def forward(self, x): 59 | return self.act(self.bn(self.deconv(x))) 60 | 61 | 62 | def fill_up_weights(up): 63 | w = up.weight.data 64 | f = math.ceil(w.size(2) / 2) 65 | c = (2 * f - 1 - f % 2) / (2. * f) 66 | for i in range(w.size(2)): 67 | for j in range(w.size(3)): 68 | w[0, 0, i, j] = \ 69 | (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c)) 70 | for c in range(1, w.size(0)): 71 | w[c, 0, :, :] = w[0, 0, :, :] 72 | 73 | 74 | class DeConvDCN(nn.Module): 75 | # convtranspose with dcn 76 | def __init__(self, c1, c2, k=4, s=2): 77 | super(DeConvDCN, self).__init__() 78 | self.layers = [] 79 | dcn = DCN(c1, c2, 80 | kernel_size=(3, 3), stride=1, 81 | padding=1, dilation=1, deformable_groups=1) 82 | deconv = nn.ConvTranspose2d( 83 | in_channels=c2, 84 | out_channels=c2, 85 | kernel_size=k, 86 | stride=s, 87 | padding=1, 88 | output_padding=0, 89 | bias=False) 90 | fill_up_weights(deconv) 91 | self.layers.append(dcn) 92 | self.layers.append(nn.BatchNorm2d(c2)) 93 | self.layers.append(nn.SiLU()) 94 | self.layers.append(deconv) 95 | self.layers.append(nn.BatchNorm2d(c2)) 96 | self.layers.append(nn.SiLU()) 97 | self.layers = nn.Sequential(*self.layers) 98 | 99 | def forward(self, x): 100 | return self.layers(x) 101 | 102 | 103 | class TransformerLayer(nn.Module): 104 | # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance) 105 | def __init__(self, c, num_heads): 106 | super().__init__() 107 | self.q = nn.Linear(c, c, bias=False) 108 | self.k = nn.Linear(c, c, bias=False) 109 | self.v = nn.Linear(c, c, bias=False) 110 | self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads) 111 | self.fc1 = nn.Linear(c, c, bias=False) 112 | self.fc2 = nn.Linear(c, c, bias=False) 113 | 114 | def forward(self, x): 115 | x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x 116 | x = self.fc2(self.fc1(x)) + x 117 | return x 118 | 119 | 120 | class TransformerBlock(nn.Module): 121 | # Vision Transformer https://arxiv.org/abs/2010.11929 122 | def __init__(self, c1, c2, num_heads, num_layers): 123 | super().__init__() 124 | self.conv = None 125 | if c1 != c2: 126 | self.conv = Conv(c1, c2) 127 | self.linear = nn.Linear(c2, c2) # learnable position embedding 128 | self.tr = nn.Sequential(*[TransformerLayer(c2, num_heads) for _ in range(num_layers)]) 129 | self.c2 = c2 130 | 131 | def forward(self, x): 132 | if self.conv is not None: 133 | x = self.conv(x) 134 | b, _, w, h = x.shape 135 | p = x.flatten(2) 136 | p = p.unsqueeze(0) 137 | p = p.transpose(0, 3) 138 | p = p.squeeze(3) 139 | e = self.linear(p) 140 | x = p + e 141 | 142 | x = self.tr(x) 143 | x = x.unsqueeze(3) 144 | x = x.transpose(0, 3) 145 | x = x.reshape(b, self.c2, w, h) 146 | return x 147 | 148 | 149 | class Bottleneck(nn.Module): 150 | # Standard bottleneck 151 | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion 152 | super(Bottleneck, self).__init__() 153 | c_ = int(c2 * e) # hidden channels 154 | self.cv1 = Conv(c1, c_, 1, 1) 155 | self.cv2 = Conv(c_, c2, 3, 1, g=g) 156 | self.add = shortcut and c1 == c2 157 | 158 | def forward(self, x): 159 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 160 | 161 | 162 | class BottleneckCSP(nn.Module): 163 | # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks 164 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 165 | super(BottleneckCSP, self).__init__() 166 | c_ = int(c2 * e) # hidden channels 167 | self.cv1 = Conv(c1, c_, 1, 1) 168 | self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False) 169 | self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False) 170 | self.cv4 = Conv(2 * c_, c2, 1, 1) 171 | self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3) 172 | self.act = nn.LeakyReLU(0.1, inplace=True) 173 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 174 | 175 | def forward(self, x): 176 | y1 = self.cv3(self.m(self.cv1(x))) 177 | y2 = self.cv2(x) 178 | return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1)))) 179 | 180 | 181 | class C3(nn.Module): 182 | # CSP Bottleneck with 3 convolutions 183 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion 184 | super(C3, self).__init__() 185 | c_ = int(c2 * e) # hidden channels 186 | self.cv1 = Conv(c1, c_, 1, 1) 187 | self.cv2 = Conv(c1, c_, 1, 1) 188 | self.cv3 = Conv(2 * c_, c2, 1) # act=FReLU(c2) 189 | self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) 190 | # self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)]) 191 | 192 | def forward(self, x): 193 | return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) 194 | 195 | 196 | class C3TR(C3): 197 | # C3 module with TransformerBlock() 198 | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): 199 | super().__init__(c1, c2, n, shortcut, g, e) 200 | c_ = int(c2 * e) 201 | self.m = TransformerBlock(c_, c_, 4, n) 202 | 203 | 204 | class SPP(nn.Module): 205 | # Spatial pyramid pooling layer used in YOLOv3-SPP 206 | def __init__(self, c1, c2, k=(5, 9, 13)): 207 | super(SPP, self).__init__() 208 | c_ = c1 // 2 # hidden channels 209 | self.cv1 = Conv(c1, c_, 1, 1) 210 | self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1) 211 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 212 | 213 | def forward(self, x): 214 | x = self.cv1(x) 215 | return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1)) 216 | 217 | 218 | class Focus(nn.Module): 219 | # Focus wh information into c-space 220 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 221 | super(Focus, self).__init__() 222 | self.conv = Conv(c1 * 4, c2, k, s, p, g, act) 223 | # self.contract = Contract(gain=2) 224 | 225 | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2) 226 | return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1)) 227 | # return self.conv(self.contract(x)) 228 | 229 | 230 | class Contract(nn.Module): 231 | # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40) 232 | def __init__(self, gain=2): 233 | super().__init__() 234 | self.gain = gain 235 | 236 | def forward(self, x): 237 | N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain' 238 | s = self.gain 239 | x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2) 240 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40) 241 | return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40) 242 | 243 | 244 | class Expand(nn.Module): 245 | # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160) 246 | def __init__(self, gain=2): 247 | super().__init__() 248 | self.gain = gain 249 | 250 | def forward(self, x): 251 | N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain' 252 | s = self.gain 253 | x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80) 254 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2) 255 | return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160) 256 | 257 | 258 | class Concat(nn.Module): 259 | # Concatenate a list of tensors along dimension 260 | def __init__(self, dimension=1): 261 | super(Concat, self).__init__() 262 | self.d = dimension 263 | 264 | def forward(self, x): 265 | return torch.cat(x, self.d) -------------------------------------------------------------------------------- /lib/models/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class _DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 43 | 44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 45 | super(_DataParallel, self).__init__() 46 | 47 | if not torch.cuda.is_available(): 48 | self.module = module 49 | self.device_ids = [] 50 | return 51 | 52 | if device_ids is None: 53 | device_ids = list(range(torch.cuda.device_count())) 54 | if output_device is None: 55 | output_device = device_ids[0] 56 | self.dim = dim 57 | self.module = module 58 | self.device_ids = device_ids 59 | self.chunk_sizes = chunk_sizes 60 | self.output_device = output_device 61 | if len(self.device_ids) == 1: 62 | self.module.cuda(device_ids[0]) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 68 | if len(self.device_ids) == 1: 69 | return self.module(*inputs[0], **kwargs[0]) 70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 71 | outputs = self.parallel_apply(replicas, inputs, kwargs) 72 | return self.gather(outputs, self.output_device) 73 | 74 | def replicate(self, module, device_ids): 75 | return replicate(module, device_ids) 76 | 77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 79 | 80 | def parallel_apply(self, replicas, inputs, kwargs): 81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 82 | 83 | def gather(self, outputs, output_device): 84 | return gather(outputs, output_device, dim=self.dim) 85 | 86 | 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 89 | 90 | This is the functional version of the DataParallel module. 91 | 92 | Args: 93 | module: the module to evaluate in parallel 94 | inputs: inputs to the module 95 | device_ids: GPU ids on which to replicate module 96 | output_device: GPU location of the output Use -1 to indicate the CPU. 97 | (default: device_ids[0]) 98 | Returns: 99 | a Variable containing the result of module(input) located on 100 | output_device 101 | """ 102 | if not isinstance(inputs, tuple): 103 | inputs = (inputs,) 104 | 105 | if device_ids is None: 106 | device_ids = list(range(torch.cuda.device_count())) 107 | 108 | if output_device is None: 109 | output_device = device_ids[0] 110 | 111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 112 | if len(device_ids) == 1: 113 | return module(*inputs[0], **module_kwargs[0]) 114 | used_device_ids = device_ids[:len(inputs)] 115 | replicas = replicate(module, used_device_ids) 116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 117 | return gather(outputs, output_device, dim) 118 | 119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 120 | if chunk_sizes is None: 121 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 122 | standard_size = True 123 | for i in range(1, len(chunk_sizes)): 124 | if chunk_sizes[i] != chunk_sizes[0]: 125 | standard_size = False 126 | if standard_size: 127 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes) -------------------------------------------------------------------------------- /lib/models/decode.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | from .utils import _gather_feat, _tranpose_and_gather_feat 8 | 9 | def _nms(heat, kernel=3): 10 | pad = (kernel - 1) // 2 11 | 12 | hmax = nn.functional.max_pool2d( 13 | heat, (kernel, kernel), stride=1, padding=pad) 14 | keep = (hmax == heat).float() 15 | return heat * keep 16 | 17 | 18 | def _topk_channel(scores, K=40): 19 | batch, cat, height, width = scores.size() 20 | 21 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 22 | 23 | topk_inds = topk_inds % (height * width) 24 | topk_ys = (topk_inds / width).int().float() 25 | topk_xs = (topk_inds % width).int().float() 26 | 27 | return topk_scores, topk_inds, topk_ys, topk_xs 28 | 29 | def _topk(scores, K=40): 30 | batch, cat, height, width = scores.size() 31 | 32 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 33 | 34 | topk_inds = topk_inds % (height * width) 35 | topk_ys = (topk_inds / width).int().float() 36 | topk_xs = (topk_inds % width).int().float() 37 | 38 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) 39 | topk_clses = (topk_ind / K).int() 40 | topk_inds = _gather_feat( 41 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) 42 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) 43 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) 44 | 45 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 46 | 47 | 48 | def mot_decode(heat, wh, reg=None, ltrb=False, K=100): 49 | batch, cat, height, width = heat.size() 50 | 51 | # heat = torch.sigmoid(heat) 52 | # perform nms on heatmaps 53 | heat = _nms(heat) 54 | 55 | scores, inds, clses, ys, xs = _topk(heat, K=K) 56 | if reg is not None: 57 | reg = _tranpose_and_gather_feat(reg, inds) 58 | reg = reg.view(batch, K, 2) 59 | xs = xs.view(batch, K, 1) + reg[:, :, 0:1] 60 | ys = ys.view(batch, K, 1) + reg[:, :, 1:2] 61 | else: 62 | xs = xs.view(batch, K, 1) + 0.5 63 | ys = ys.view(batch, K, 1) + 0.5 64 | wh = _tranpose_and_gather_feat(wh, inds) 65 | if ltrb: 66 | wh = wh.view(batch, K, 4) 67 | else: 68 | wh = wh.view(batch, K, 2) 69 | clses = clses.view(batch, K, 1).float() 70 | scores = scores.view(batch, K, 1) 71 | if ltrb: 72 | bboxes = torch.cat([xs - wh[..., 0:1], 73 | ys - wh[..., 1:2], 74 | xs + wh[..., 2:3], 75 | ys + wh[..., 3:4]], dim=2) 76 | else: 77 | bboxes = torch.cat([xs - wh[..., 0:1] / 2, 78 | ys - wh[..., 1:2] / 2, 79 | xs + wh[..., 0:1] / 2, 80 | ys + wh[..., 1:2] / 2], dim=2) 81 | detections = torch.cat([bboxes, scores, clses], dim=2) 82 | 83 | return detections, inds 84 | -------------------------------------------------------------------------------- /lib/models/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torchvision.models as models 6 | import torch 7 | import torch.nn as nn 8 | import os 9 | 10 | from .networks.dlav0 import get_pose_net as get_dlav0 11 | from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn 12 | from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn 13 | from .networks.resnet_fpn_dcn import get_pose_net as get_pose_net_fpn_dcn 14 | from .networks.pose_hrnet import get_pose_net as get_pose_net_hrnet 15 | from .networks.pose_dla_conv import get_pose_net as get_dla_conv 16 | from .yolo import get_pose_net as get_pose_net_yolo 17 | 18 | _model_factory = { 19 | 'dlav0': get_dlav0, # default DLAup 20 | 'dla': get_dla_dcn, 21 | 'dlaconv': get_dla_conv, 22 | 'resdcn': get_pose_net_dcn, 23 | 'resfpndcn': get_pose_net_fpn_dcn, 24 | 'hrnet': get_pose_net_hrnet, 25 | 'yolo': get_pose_net_yolo 26 | } 27 | 28 | def create_model(arch, heads, head_conv): 29 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 30 | arch = arch[:arch.find('_')] if '_' in arch else arch 31 | get_model = _model_factory[arch] 32 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv) 33 | return model 34 | 35 | def load_model(model, model_path, optimizer=None, resume=False, 36 | lr=None, lr_step=None): 37 | start_epoch = 0 38 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 39 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 40 | state_dict_ = checkpoint['state_dict'] 41 | state_dict = {} 42 | 43 | # convert data_parallal to model 44 | for k in state_dict_: 45 | if k.startswith('module') and not k.startswith('module_list'): 46 | state_dict[k[7:]] = state_dict_[k] 47 | else: 48 | state_dict[k] = state_dict_[k] 49 | model_state_dict = model.state_dict() 50 | 51 | # check loaded parameters and created model parameters 52 | msg = 'If you see this, your model does not fully load the ' + \ 53 | 'pre-trained weight. Please make sure ' + \ 54 | 'you have correctly specified --arch xxx ' + \ 55 | 'or set the correct --num_classes for your own dataset.' 56 | for k in state_dict: 57 | if k in model_state_dict: 58 | if state_dict[k].shape != model_state_dict[k].shape: 59 | print('Skip loading parameter {}, required shape{}, '\ 60 | 'loaded shape{}. {}'.format( 61 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 62 | state_dict[k] = model_state_dict[k] 63 | else: 64 | print('Drop parameter {}.'.format(k) + msg) 65 | for k in model_state_dict: 66 | if not (k in state_dict): 67 | print('No param {}.'.format(k) + msg) 68 | state_dict[k] = model_state_dict[k] 69 | model.load_state_dict(state_dict, strict=False) 70 | 71 | # resume optimizer parameters 72 | if optimizer is not None and resume: 73 | if 'optimizer' in checkpoint: 74 | optimizer.load_state_dict(checkpoint['optimizer']) 75 | start_epoch = checkpoint['epoch'] 76 | start_lr = lr 77 | for step in lr_step: 78 | if start_epoch >= step: 79 | start_lr *= 0.1 80 | for param_group in optimizer.param_groups: 81 | param_group['lr'] = start_lr 82 | print('Resumed optimizer with start lr', start_lr) 83 | else: 84 | print('No optimizer parameters in checkpoint.') 85 | if optimizer is not None: 86 | return model, optimizer, start_epoch 87 | else: 88 | return model 89 | 90 | def save_model(path, epoch, model, optimizer=None): 91 | if isinstance(model, torch.nn.DataParallel): 92 | state_dict = model.module.state_dict() 93 | else: 94 | state_dict = model.state_dict() 95 | data = {'epoch': epoch, 96 | 'state_dict': state_dict} 97 | if not (optimizer is None): 98 | data['optimizer'] = optimizer.state_dict() 99 | torch.save(data, path) 100 | 101 | -------------------------------------------------------------------------------- /lib/models/networks/__pycache__/dlav0.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/dlav0.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/__pycache__/pose_dla_conv.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/pose_dla_conv.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/__pycache__/pose_dla_dcn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/pose_dla_dcn.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/__pycache__/pose_hrnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/pose_hrnet.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/__pycache__/resnet_dcn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/resnet_dcn.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/__pycache__/resnet_fpn_dcn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/__pycache__/resnet_fpn_dcn.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import _C as cfg 2 | from .default import update_config -------------------------------------------------------------------------------- /lib/models/networks/config/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/config/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/config/__pycache__/default.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/models/networks/config/__pycache__/default.cpython-38.pyc -------------------------------------------------------------------------------- /lib/models/networks/config/default.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | 7 | from yacs.config import CfgNode as CN 8 | 9 | 10 | _C = CN() 11 | 12 | _C.OUTPUT_DIR = '' 13 | _C.LOG_DIR = '' 14 | _C.DATA_DIR = '' 15 | _C.GPUS = (0,) 16 | _C.WORKERS = 4 17 | _C.PRINT_FREQ = 20 18 | _C.AUTO_RESUME = False 19 | _C.PIN_MEMORY = True 20 | _C.RANK = 0 21 | 22 | # Cudnn related params 23 | _C.CUDNN = CN() 24 | _C.CUDNN.BENCHMARK = True 25 | _C.CUDNN.DETERMINISTIC = False 26 | _C.CUDNN.ENABLED = True 27 | 28 | # common params for NETWORK 29 | _C.MODEL = CN() 30 | _C.MODEL.NAME = 'pose_hrnet' 31 | _C.MODEL.INIT_WEIGHTS = True 32 | _C.MODEL.PRETRAINED = '' 33 | _C.MODEL.NUM_JOINTS = 17 34 | _C.MODEL.TAG_PER_JOINT = True 35 | _C.MODEL.TARGET_TYPE = 'gaussian' 36 | _C.MODEL.IMAGE_SIZE = [256, 256] # width * height, ex: 192 * 256 37 | _C.MODEL.HEATMAP_SIZE = [64, 64] # width * height, ex: 24 * 32 38 | _C.MODEL.SIGMA = 2 39 | _C.MODEL.EXTRA = CN(new_allowed=True) 40 | 41 | _C.LOSS = CN() 42 | _C.LOSS.USE_OHKM = False 43 | _C.LOSS.TOPK = 8 44 | _C.LOSS.USE_TARGET_WEIGHT = True 45 | _C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False 46 | 47 | # DATASET related params 48 | _C.DATASET = CN() 49 | _C.DATASET.ROOT = '' 50 | _C.DATASET.DATASET = 'mpii' 51 | _C.DATASET.TRAIN_SET = 'train' 52 | _C.DATASET.TEST_SET = 'valid' 53 | _C.DATASET.DATA_FORMAT = 'jpg' 54 | _C.DATASET.HYBRID_JOINTS_TYPE = '' 55 | _C.DATASET.SELECT_DATA = False 56 | 57 | # training data augmentation 58 | _C.DATASET.FLIP = True 59 | _C.DATASET.SCALE_FACTOR = 0.25 60 | _C.DATASET.ROT_FACTOR = 30 61 | _C.DATASET.PROB_HALF_BODY = 0.0 62 | _C.DATASET.NUM_JOINTS_HALF_BODY = 8 63 | _C.DATASET.COLOR_RGB = False 64 | 65 | # train 66 | _C.TRAIN = CN() 67 | 68 | _C.TRAIN.LR_FACTOR = 0.1 69 | _C.TRAIN.LR_STEP = [90, 110] 70 | _C.TRAIN.LR = 0.001 71 | 72 | _C.TRAIN.OPTIMIZER = 'adam' 73 | _C.TRAIN.MOMENTUM = 0.9 74 | _C.TRAIN.WD = 0.0001 75 | _C.TRAIN.NESTEROV = False 76 | _C.TRAIN.GAMMA1 = 0.99 77 | _C.TRAIN.GAMMA2 = 0.0 78 | 79 | _C.TRAIN.BEGIN_EPOCH = 0 80 | _C.TRAIN.END_EPOCH = 140 81 | 82 | _C.TRAIN.RESUME = False 83 | _C.TRAIN.CHECKPOINT = '' 84 | 85 | _C.TRAIN.BATCH_SIZE_PER_GPU = 32 86 | _C.TRAIN.SHUFFLE = True 87 | 88 | # testing 89 | _C.TEST = CN() 90 | 91 | # size of images for each device 92 | _C.TEST.BATCH_SIZE_PER_GPU = 32 93 | # Test Model Epoch 94 | _C.TEST.FLIP_TEST = False 95 | _C.TEST.POST_PROCESS = False 96 | _C.TEST.SHIFT_HEATMAP = False 97 | 98 | _C.TEST.USE_GT_BBOX = False 99 | 100 | # nms 101 | _C.TEST.IMAGE_THRE = 0.1 102 | _C.TEST.NMS_THRE = 0.6 103 | _C.TEST.SOFT_NMS = False 104 | _C.TEST.OKS_THRE = 0.5 105 | _C.TEST.IN_VIS_THRE = 0.0 106 | _C.TEST.COCO_BBOX_FILE = '' 107 | _C.TEST.BBOX_THRE = 1.0 108 | _C.TEST.MODEL_FILE = '' 109 | 110 | # debug 111 | _C.DEBUG = CN() 112 | _C.DEBUG.DEBUG = False 113 | _C.DEBUG.SAVE_BATCH_IMAGES_GT = False 114 | _C.DEBUG.SAVE_BATCH_IMAGES_PRED = False 115 | _C.DEBUG.SAVE_HEATMAPS_GT = False 116 | _C.DEBUG.SAVE_HEATMAPS_PRED = False 117 | 118 | 119 | def update_config(cfg, cfg_dir): 120 | cfg.defrost() 121 | cfg.merge_from_file(cfg_dir) 122 | cfg.freeze() 123 | 124 | 125 | if __name__ == '__main__': 126 | import sys 127 | with open(sys.argv[1], 'w') as f: 128 | print(_C, file=f) -------------------------------------------------------------------------------- /lib/models/networks/config/hrnet_w18.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: '../models/hrnetv2_w18_imagenet_pretrained.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 18 61 | - 36 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 18 73 | - 36 74 | - 72 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 18 87 | - 36 88 | - 72 89 | - 144 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true -------------------------------------------------------------------------------- /lib/models/networks/config/hrnet_w32.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: true 2 | CUDNN: 3 | BENCHMARK: true 4 | DETERMINISTIC: false 5 | ENABLED: true 6 | DATA_DIR: '' 7 | GPUS: (0,1,2,3) 8 | OUTPUT_DIR: 'output' 9 | LOG_DIR: 'log' 10 | WORKERS: 24 11 | PRINT_FREQ: 100 12 | 13 | DATASET: 14 | COLOR_RGB: true 15 | DATASET: 'coco' 16 | DATA_FORMAT: jpg 17 | FLIP: true 18 | NUM_JOINTS_HALF_BODY: 8 19 | PROB_HALF_BODY: 0.3 20 | ROOT: 'data/coco/' 21 | ROT_FACTOR: 45 22 | SCALE_FACTOR: 0.35 23 | TEST_SET: 'val2017' 24 | TRAIN_SET: 'train2017' 25 | MODEL: 26 | INIT_WEIGHTS: true 27 | NAME: pose_hrnet 28 | NUM_JOINTS: 17 29 | PRETRAINED: '../models/hrnetv2_w32_imagenet_pretrained.pth' 30 | TARGET_TYPE: gaussian 31 | IMAGE_SIZE: 32 | - 192 33 | - 256 34 | HEATMAP_SIZE: 35 | - 48 36 | - 64 37 | SIGMA: 2 38 | EXTRA: 39 | PRETRAINED_LAYERS: 40 | - 'conv1' 41 | - 'bn1' 42 | - 'conv2' 43 | - 'bn2' 44 | - 'layer1' 45 | - 'transition1' 46 | - 'stage2' 47 | - 'transition2' 48 | - 'stage3' 49 | - 'transition3' 50 | - 'stage4' 51 | FINAL_CONV_KERNEL: 1 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | LOSS: 92 | USE_TARGET_WEIGHT: true 93 | TRAIN: 94 | BATCH_SIZE_PER_GPU: 32 95 | SHUFFLE: true 96 | BEGIN_EPOCH: 0 97 | END_EPOCH: 210 98 | OPTIMIZER: adam 99 | LR: 0.001 100 | LR_FACTOR: 0.1 101 | LR_STEP: 102 | - 170 103 | - 200 104 | WD: 0.0001 105 | GAMMA1: 0.99 106 | GAMMA2: 0.0 107 | MOMENTUM: 0.9 108 | NESTEROV: false 109 | TEST: 110 | BATCH_SIZE_PER_GPU: 32 111 | COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json' 112 | BBOX_THRE: 1.0 113 | IMAGE_THRE: 0.0 114 | IN_VIS_THRE: 0.2 115 | MODEL_FILE: '' 116 | NMS_THRE: 1.0 117 | OKS_THRE: 0.9 118 | USE_GT_BBOX: true 119 | FLIP_TEST: true 120 | POST_PROCESS: true 121 | SHIFT_HEATMAP: true 122 | DEBUG: 123 | DEBUG: true 124 | SAVE_BATCH_IMAGES_GT: true 125 | SAVE_BATCH_IMAGES_PRED: true 126 | SAVE_HEATMAPS_GT: true 127 | SAVE_HEATMAPS_PRED: true -------------------------------------------------------------------------------- /lib/models/networks/config/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 80 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | 7 | # YOLOv5 backbone 8 | backbone: 9 | # [from, number, module, args] 10 | [[-1, 1, Focus, [64, 3]], # 0-P1/2 11 | [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 12 | [-1, 3, C3, [128]], 13 | [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 14 | [-1, 9, C3, [256]], 15 | [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 16 | [-1, 9, C3, [512]], 17 | [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 18 | [-1, 1, SPP, [1024, [5, 9, 13]]], 19 | [-1, 3, C3, [1024, False]], # 9 20 | 21 | [ -1, 1, Conv, [ 512, 1, 1 ] ], # 10 upsample /16 22 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 23 | [ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4 24 | [ -1, 3, C3, [ 512, False ] ], # 13 25 | 26 | [ -1, 1, Conv, [ 256, 1, 1 ] ], # 14 upsample /8 27 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 28 | [ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3 29 | [ -1, 3, C3, [ 256, False ] ], # 17 30 | 31 | [ -1, 1, Conv, [ 128, 1, 1 ] ], # 18 upsample /4 32 | [ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ], 33 | [ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2 34 | [ -1, 3, C3, [ 128, False ] ] # 21 35 | ] 36 | 37 | -------------------------------------------------------------------------------- /lib/models/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /lib/models/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | def _gather_feat(feat, ind, mask=None): 13 | dim = feat.size(2) 14 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 15 | feat = feat.gather(1, ind) 16 | if mask is not None: 17 | mask = mask.unsqueeze(2).expand_as(feat) 18 | feat = feat[mask] 19 | feat = feat.view(-1, dim) 20 | return feat 21 | 22 | def _tranpose_and_gather_feat(feat, ind): 23 | feat = feat.permute(0, 2, 3, 1).contiguous() 24 | feat = feat.view(feat.size(0), -1, feat.size(3)) 25 | feat = _gather_feat(feat, ind) 26 | return feat 27 | 28 | def flip_tensor(x): 29 | return torch.flip(x, [3]) 30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 31 | # return torch.from_numpy(tmp).to(x.device) 32 | 33 | def flip_lr(x, flip_idx): 34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 35 | shape = tmp.shape 36 | for e in flip_idx: 37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 40 | 41 | def flip_lr_off(x, flip_idx): 42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 43 | shape = tmp.shape 44 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 45 | tmp.shape[2], tmp.shape[3]) 46 | tmp[:, :, 0, :, :] *= -1 47 | for e in flip_idx: 48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) -------------------------------------------------------------------------------- /lib/models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import sys 4 | import os 5 | from copy import deepcopy 6 | from pathlib import Path 7 | 8 | from .common import * 9 | 10 | 11 | class Detect(nn.Module): 12 | stride = None # strides computed during build 13 | onnx_dynamic = False # ONNX export parameter 14 | 15 | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detection layer 16 | super(Detect, self).__init__() 17 | self.nc = nc # number of classes 18 | self.no = nc + 5 # number of outputs per anchor 19 | self.nl = len(anchors) # number of detection layers 20 | self.na = len(anchors[0]) // 2 # number of anchors 21 | self.grid = [torch.zeros(1)] * self.nl # init grid 22 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 23 | self.register_buffer('anchors', a) # shape(nl,na,2) 24 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 25 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 26 | self.inplace = inplace # use in-place ops (e.g. slice assignment) 27 | 28 | def forward(self, x): 29 | # x = x.copy() # for profiling 30 | z = [] # inference output 31 | for i in range(self.nl): 32 | x[i] = self.m[i](x[i]) # conv 33 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 34 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 35 | 36 | if not self.training: # inference 37 | if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: 38 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 39 | 40 | y = x[i].sigmoid() 41 | if self.inplace: 42 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 43 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 44 | else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 45 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 46 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh 47 | y = torch.cat((xy, wh, y[..., 4:]), -1) 48 | z.append(y.view(bs, -1, self.no)) 49 | 50 | return x if self.training else (torch.cat(z, 1), x) 51 | 52 | @staticmethod 53 | def _make_grid(nx=20, ny=20): 54 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 55 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 56 | 57 | 58 | def fill_fc_weights(layers): 59 | for m in layers.modules(): 60 | if isinstance(m, nn.Conv2d): 61 | if m.bias is not None: 62 | nn.init.constant_(m.bias, 0) 63 | 64 | 65 | class Model(nn.Module): 66 | def __init__(self, config='config/yolov5s.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes 67 | super(Model, self).__init__() 68 | print(config) 69 | if isinstance(config, dict): 70 | self.yaml = config # model dict 71 | else: # is *.yaml 72 | import yaml # for torch hub 73 | self.yaml_file = Path(config).name 74 | with open(config) as f: 75 | self.yaml = yaml.safe_load(f) # model dict 76 | 77 | # Define model 78 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 79 | if nc and nc != self.yaml['nc']: 80 | self.yaml['nc'] = nc # override yaml value 81 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 82 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 83 | self.inplace = self.yaml.get('inplace', True) 84 | # logger.info([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 85 | 86 | def forward(self, x, augment=False, profile=False): 87 | return self.forward_once(x, profile) # single-scale inference, train 88 | 89 | def forward_once(self, x, profile=False): 90 | y, dt = [], [] # outputs 91 | for m in self.model: 92 | if m.f != -1: # if not from previous layer 93 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 94 | 95 | x = m(x) # run 96 | y.append(x if m.i in self.save else None) # save output 97 | 98 | return x 99 | 100 | 101 | def parse_model(d, ch): # model_dict, input_channels(3) 102 | nc, gd, gw = d['nc'], d['depth_multiple'], d['width_multiple'] 103 | 104 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 105 | for i, (f, n, m, args) in enumerate(d['backbone']): # from, number, module, args 106 | m = eval(m) if isinstance(m, str) else m # eval strings 107 | for j, a in enumerate(args): 108 | try: 109 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 110 | except: 111 | pass 112 | 113 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 114 | if m in [Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, C3, C3TR, DeConv, DeConvDCN]: 115 | c1, c2 = ch[f], args[0] 116 | c2 = make_divisible(c2 * gw, 8) 117 | 118 | args = [c1, c2, *args[1:]] 119 | if m in [BottleneckCSP, C3, C3TR]: 120 | args.insert(2, n) # number of repeats 121 | n = 1 122 | elif m is nn.BatchNorm2d: 123 | args = [ch[f]] 124 | elif m is Concat: 125 | c2 = sum([ch[x] for x in f]) 126 | elif m is Contract: 127 | c2 = ch[f] * args[0] ** 2 128 | elif m is Expand: 129 | c2 = ch[f] // args[0] ** 2 130 | else: 131 | c2 = ch[f] 132 | 133 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 134 | t = str(m)[8:-2].replace('__main__.', '') # module type 135 | np = sum([x.numel() for x in m_.parameters()]) # number params 136 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 137 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 138 | layers.append(m_) 139 | if i == 0: 140 | ch = [] 141 | ch.append(c2) 142 | return nn.Sequential(*layers), sorted(save) 143 | 144 | 145 | class PoseYOLOv5s(nn.Module): 146 | def __init__(self, heads, config_file): 147 | self.heads = heads 148 | super(PoseYOLOv5s, self).__init__() 149 | self.backbone = Model(config_file) 150 | for head in sorted(self.heads): 151 | num_output = self.heads[head] 152 | fc = nn.Sequential( 153 | nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=True), 154 | nn.SiLU(), 155 | nn.Conv2d(64, num_output, kernel_size=1, stride=1, padding=0)) 156 | self.__setattr__(head, fc) 157 | if 'hm' in head: 158 | fc[-1].bias.data.fill_(-2.19) 159 | else: 160 | fill_fc_weights(fc) 161 | 162 | def forward(self, x): 163 | x = self.backbone(x) 164 | ret = {} 165 | for head in self.heads: 166 | ret[head] = self.__getattr__(head)(x) 167 | return [ret] 168 | 169 | 170 | def get_pose_net(num_layers, heads, head_conv): 171 | config_file = '../src/lib/models/networks/config/yolov5s.yaml' 172 | pretrained = '../models/yolov5s.pt' 173 | model = PoseYOLOv5s(heads, config_file) 174 | initialize_weights(model, pretrained) 175 | return model 176 | 177 | 178 | def intersect_dicts(da, db, exclude=()): 179 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 180 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 181 | 182 | 183 | def initialize_weights(model, pretrained=''): 184 | for i, m in enumerate(model.modules()): 185 | t = type(m) 186 | if t is nn.Conv2d: 187 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 188 | elif t is nn.BatchNorm2d: 189 | m.eps = 1e-3 190 | m.momentum = 0.03 191 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 192 | m.inplace = True 193 | 194 | for head in model.heads: 195 | final_layer = model.__getattr__(head) 196 | for i, m in enumerate(final_layer.modules()): 197 | if isinstance(m, nn.Conv2d): 198 | if m.weight.shape[0] == model.heads[head]: 199 | if 'hm' in head: 200 | nn.init.constant_(m.bias, -2.19) 201 | else: 202 | nn.init.normal_(m.weight, std=0.001) 203 | nn.init.constant_(m.bias, 0) 204 | 205 | if os.path.isfile(pretrained): 206 | ckpt = torch.load(pretrained) # load checkpoint 207 | state_dict = ckpt['model'].float().state_dict() # to FP32 208 | state_dict = intersect_dicts(state_dict, model.backbone.state_dict()) # intersect 209 | model.backbone.load_state_dict(state_dict, strict=False) # load 210 | print('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), pretrained)) # report 211 | 212 | 213 | def make_divisible(x, divisor): 214 | # Returns x evenly divisible by divisor 215 | return math.ceil(x / divisor) * divisor -------------------------------------------------------------------------------- /lib/tracker/__pycache__/basetrack.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracker/__pycache__/basetrack.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracker/__pycache__/matching.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracker/__pycache__/matching.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracker/__pycache__/multitracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracker/__pycache__/multitracker.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class TrackState(object): 6 | New = 0 7 | Tracked = 1 8 | Lost = 2 9 | Removed = 3 10 | 11 | 12 | class BaseTrack(object): 13 | _count = 0 14 | 15 | track_id = 0 16 | is_activated = False 17 | state = TrackState.New 18 | 19 | history = OrderedDict() 20 | features = [] 21 | curr_feature = None 22 | score = 0 23 | start_frame = 0 24 | frame_id = 0 25 | time_since_update = 0 26 | 27 | # multi-camera 28 | location = (np.inf, np.inf) 29 | 30 | @property 31 | def end_frame(self): 32 | return self.frame_id 33 | 34 | @staticmethod 35 | def next_id(): 36 | BaseTrack._count += 1 37 | return BaseTrack._count 38 | 39 | def activate(self, *args): 40 | raise NotImplementedError 41 | 42 | def predict(self): 43 | raise NotImplementedError 44 | 45 | def update(self, *args, **kwargs): 46 | raise NotImplementedError 47 | 48 | def mark_lost(self): 49 | self.state = TrackState.Lost 50 | 51 | def mark_removed(self): 52 | self.state = TrackState.Removed -------------------------------------------------------------------------------- /lib/tracker/matching.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import scipy 4 | import lap 5 | from scipy.spatial.distance import cdist 6 | 7 | from cython_bbox import bbox_overlaps as bbox_ious 8 | from lib.tracking_utils import kalman_filter 9 | import time 10 | 11 | def merge_matches(m1, m2, shape): 12 | O,P,Q = shape 13 | m1 = np.asarray(m1) 14 | m2 = np.asarray(m2) 15 | 16 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) 17 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) 18 | 19 | mask = M1*M2 20 | match = mask.nonzero() 21 | match = list(zip(match[0], match[1])) 22 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) 23 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) 24 | 25 | return match, unmatched_O, unmatched_Q 26 | 27 | 28 | def _indices_to_matches(cost_matrix, indices, thresh): 29 | matched_cost = cost_matrix[tuple(zip(*indices))] 30 | matched_mask = (matched_cost <= thresh) 31 | 32 | matches = indices[matched_mask] 33 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) 34 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) 35 | 36 | return matches, unmatched_a, unmatched_b 37 | 38 | 39 | def linear_assignment(cost_matrix, thresh): 40 | if cost_matrix.size == 0: 41 | return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) 42 | matches, unmatched_a, unmatched_b = [], [], [] 43 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) 44 | for ix, mx in enumerate(x): 45 | if mx >= 0: 46 | matches.append([ix, mx]) 47 | unmatched_a = np.where(x < 0)[0] 48 | unmatched_b = np.where(y < 0)[0] 49 | matches = np.asarray(matches) 50 | return matches, unmatched_a, unmatched_b 51 | 52 | 53 | def ious(atlbrs, btlbrs): 54 | """ 55 | Compute cost based on IoU 56 | :type atlbrs: list[tlbr] | np.ndarray 57 | :type atlbrs: list[tlbr] | np.ndarray 58 | 59 | :rtype ious np.ndarray 60 | """ 61 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) 62 | if ious.size == 0: 63 | return ious 64 | 65 | ious = bbox_ious( 66 | np.ascontiguousarray(atlbrs, dtype=np.float), 67 | np.ascontiguousarray(btlbrs, dtype=np.float) 68 | ) 69 | 70 | return ious 71 | 72 | 73 | def iou_distance(atracks, btracks): 74 | """ 75 | Compute cost based on IoU 76 | :type atracks: list[STrack] 77 | :type btracks: list[STrack] 78 | 79 | :rtype cost_matrix np.ndarray 80 | """ 81 | 82 | if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 83 | atlbrs = atracks 84 | btlbrs = btracks 85 | else: 86 | atlbrs = [track.tlbr for track in atracks] 87 | btlbrs = [track.tlbr for track in btracks] 88 | _ious = ious(atlbrs, btlbrs) 89 | cost_matrix = 1 - _ious 90 | 91 | return cost_matrix 92 | 93 | def embedding_distance(tracks, detections, metric='cosine'): 94 | """ 95 | :param tracks: list[STrack] 96 | :param detections: list[BaseTrack] 97 | :param metric: 98 | :return: cost_matrix np.ndarray 99 | """ 100 | 101 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) 102 | if cost_matrix.size == 0: 103 | return cost_matrix 104 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) 105 | #for i, track in enumerate(tracks): 106 | #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) 107 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) 108 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features 109 | return cost_matrix 110 | 111 | 112 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 113 | if cost_matrix.size == 0: 114 | return cost_matrix 115 | gating_dim = 2 if only_position else 4 116 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 117 | measurements = np.asarray([det.to_xyah() for det in detections]) 118 | for row, track in enumerate(tracks): 119 | gating_distance = kf.gating_distance( 120 | track.mean, track.covariance, measurements, only_position) 121 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 122 | return cost_matrix 123 | 124 | 125 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): 126 | if cost_matrix.size == 0: 127 | return cost_matrix 128 | gating_dim = 2 if only_position else 4 129 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 130 | measurements = np.asarray([det.to_xyah() for det in detections]) 131 | for row, track in enumerate(tracks): 132 | gating_distance = kf.gating_distance( 133 | track.mean, track.covariance, measurements, only_position, metric='maha') 134 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 135 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance 136 | return cost_matrix 137 | -------------------------------------------------------------------------------- /lib/tracking_utils/__pycache__/evaluation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/evaluation.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracking_utils/__pycache__/io.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/io.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracking_utils/__pycache__/kalman_filter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/kalman_filter.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracking_utils/__pycache__/log.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/log.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracking_utils/__pycache__/timer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/timer.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracking_utils/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracking_utils/__pycache__/visualization.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/tracking_utils/__pycache__/visualization.cpython-38.pyc -------------------------------------------------------------------------------- /lib/tracking_utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | 7 | from lib.tracking_utils.io import read_results, unzip_objs 8 | 9 | 10 | class Evaluator(object): 11 | 12 | def __init__(self, data_root, seq_name, data_type): 13 | self.data_root = data_root 14 | self.seq_name = seq_name 15 | self.data_type = data_type 16 | 17 | self.load_annotations() 18 | self.reset_accumulator() 19 | 20 | def load_annotations(self): 21 | assert self.data_type == 'mot' 22 | 23 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 24 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 25 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 26 | 27 | def reset_accumulator(self): 28 | self.acc = mm.MOTAccumulator(auto_id=True) 29 | 30 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 31 | # results 32 | trk_tlwhs = np.copy(trk_tlwhs) 33 | trk_ids = np.copy(trk_ids) 34 | 35 | # gts 36 | gt_objs = self.gt_frame_dict.get(frame_id, []) 37 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 38 | 39 | # ignore boxes 40 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 41 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 57 | #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 58 | #match_ious = iou_distance[match_is, match_js] 59 | 60 | #match_js = np.asarray(match_js, dtype=int) 61 | #match_js = match_js[np.logical_not(np.isnan(match_ious))] 62 | #keep[match_js] = False 63 | #trk_tlwhs = trk_tlwhs[keep] 64 | #trk_ids = trk_ids[keep] 65 | 66 | # get distance matrix 67 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 68 | 69 | # acc 70 | self.acc.update(gt_ids, trk_ids, iou_distance) 71 | 72 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 73 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 74 | else: 75 | events = None 76 | return events 77 | 78 | def eval_file(self, filename): 79 | self.reset_accumulator() 80 | 81 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 82 | #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 83 | frames = sorted(list(set(result_frame_dict.keys()))) 84 | for frame_id in frames: 85 | trk_objs = result_frame_dict.get(frame_id, []) 86 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 87 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 88 | 89 | return self.acc 90 | 91 | @staticmethod 92 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 93 | names = copy.deepcopy(names) 94 | if metrics is None: 95 | metrics = mm.metrics.motchallenge_metrics 96 | metrics = copy.deepcopy(metrics) 97 | 98 | mh = mm.metrics.create() 99 | summary = mh.compute_many( 100 | accs, 101 | metrics=metrics, 102 | names=names, 103 | generate_overall=True 104 | ) 105 | 106 | return summary 107 | 108 | @staticmethod 109 | def save_summary(summary, filename): 110 | import pandas as pd 111 | writer = pd.ExcelWriter(filename) 112 | summary.to_excel(writer) 113 | writer.save() 114 | -------------------------------------------------------------------------------- /lib/tracking_utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | from lib.tracking_utils.log import logger 6 | 7 | 8 | def write_results(filename, results_dict: Dict, data_type: str): 9 | if not filename: 10 | return 11 | path = os.path.dirname(filename) 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | if data_type in ('mot', 'mcmot', 'lab'): 16 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 17 | elif data_type == 'kitti': 18 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 19 | else: 20 | raise ValueError(data_type) 21 | 22 | with open(filename, 'w') as f: 23 | for frame_id, frame_data in results_dict.items(): 24 | if data_type == 'kitti': 25 | frame_id -= 1 26 | for tlwh, track_id in frame_data: 27 | if track_id < 0: 28 | continue 29 | x1, y1, w, h = tlwh 30 | x2, y2 = x1 + w, y1 + h 31 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) 32 | f.write(line) 33 | logger.info('Save results to {}'.format(filename)) 34 | 35 | 36 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 37 | if data_type in ('mot', 'lab'): 38 | read_fun = read_mot_results 39 | else: 40 | raise ValueError('Unknown data type: {}'.format(data_type)) 41 | 42 | return read_fun(filename, is_gt, is_ignore) 43 | 44 | 45 | """ 46 | labels={'ped', ... % 1 47 | 'person_on_vhcl', ... % 2 48 | 'car', ... % 3 49 | 'bicycle', ... % 4 50 | 'mbike', ... % 5 51 | 'non_mot_vhcl', ... % 6 52 | 'static_person', ... % 7 53 | 'distractor', ... % 8 54 | 'occluder', ... % 9 55 | 'occluder_on_grnd', ... %10 56 | 'occluder_full', ... % 11 57 | 'reflection', ... % 12 58 | 'crowd' ... % 13 59 | }; 60 | """ 61 | 62 | 63 | def read_mot_results(filename, is_gt, is_ignore): 64 | valid_labels = {1} 65 | ignore_labels = {2, 7, 8, 12} 66 | results_dict = dict() 67 | if os.path.isfile(filename): 68 | with open(filename, 'r') as f: 69 | for line in f.readlines(): 70 | linelist = line.split(',') 71 | if len(linelist) < 7: 72 | continue 73 | fid = int(linelist[0]) 74 | if fid < 1: 75 | continue 76 | results_dict.setdefault(fid, list()) 77 | 78 | box_size = float(linelist[4]) * float(linelist[5]) 79 | 80 | if is_gt: 81 | if 'MOT16-' in filename or 'MOT17-' in filename: 82 | label = int(float(linelist[7])) 83 | mark = int(float(linelist[6])) 84 | if mark == 0 or label not in valid_labels: 85 | continue 86 | score = 1 87 | elif is_ignore: 88 | if 'MOT16-' in filename or 'MOT17-' in filename: 89 | label = int(float(linelist[7])) 90 | vis_ratio = float(linelist[8]) 91 | if label not in ignore_labels and vis_ratio >= 0: 92 | continue 93 | else: 94 | continue 95 | score = 1 96 | else: 97 | score = float(linelist[6]) 98 | 99 | #if box_size > 7000: 100 | #if box_size <= 7000 or box_size >= 15000: 101 | #if box_size < 15000: 102 | #continue 103 | 104 | tlwh = tuple(map(float, linelist[2:6])) 105 | target_id = int(linelist[1]) 106 | 107 | results_dict[fid].append((tlwh, target_id, score)) 108 | 109 | return results_dict 110 | 111 | 112 | def unzip_objs(objs): 113 | if len(objs) > 0: 114 | tlwhs, ids, scores = zip(*objs) 115 | else: 116 | tlwhs, ids, scores = [], [], [] 117 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 118 | 119 | return tlwhs, ids, scores -------------------------------------------------------------------------------- /lib/tracking_utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | return logger 16 | 17 | 18 | logger = get_logger('root') 19 | -------------------------------------------------------------------------------- /lib/tracking_utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from lib.tracking_utils import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /lib/tracking_utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | module_defs[-1][key.rstrip()] = value.strip() 18 | 19 | return module_defs 20 | 21 | 22 | def parse_data_cfg(path): 23 | """Parses the data configuration file""" 24 | options = dict() 25 | options['gpus'] = '0' 26 | options['num_workers'] = '10' 27 | with open(path, 'r') as fp: 28 | lines = fp.readlines() 29 | for line in lines: 30 | line = line.strip() 31 | if line == '' or line.startswith('#'): 32 | continue 33 | key, value = line.split('=') 34 | options[key.strip()] = value.strip() 35 | return options 36 | -------------------------------------------------------------------------------- /lib/tracking_utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /lib/tracking_utils/visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def tlwhs_to_tlbrs(tlwhs): 6 | tlbrs = np.copy(tlwhs) 7 | if len(tlbrs) == 0: 8 | return tlbrs 9 | tlbrs[:, 2] += tlwhs[:, 0] 10 | tlbrs[:, 3] += tlwhs[:, 1] 11 | return tlbrs 12 | 13 | 14 | def get_color(idx): 15 | idx = idx * 3 16 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) 17 | 18 | return color 19 | 20 | 21 | def resize_image(image, max_size=800): 22 | if max(image.shape[:2]) > max_size: 23 | scale = float(max_size) / max(image.shape[:2]) 24 | image = cv2.resize(image, None, fx=scale, fy=scale) 25 | return image 26 | 27 | 28 | def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None): 29 | im = np.ascontiguousarray(np.copy(image)) 30 | im_h, im_w = im.shape[:2] 31 | 32 | top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 33 | 34 | text_scale = max(1, image.shape[1] / 1600.) 35 | text_thickness = 2 36 | line_thickness = max(1, int(image.shape[1] / 500.)) 37 | 38 | radius = max(5, int(im_w/140.)) 39 | cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), 40 | (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), thickness=2) 41 | 42 | for i, tlwh in enumerate(tlwhs): 43 | x1, y1, w, h = tlwh 44 | intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) 45 | obj_id = int(obj_ids[i]) 46 | id_text = '{}'.format(int(obj_id)) 47 | if ids2 is not None: 48 | id_text = id_text + ', {}'.format(int(ids2[i])) 49 | _line_thickness = 1 if obj_id <= 0 else line_thickness 50 | color = get_color(abs(obj_id)) 51 | cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) 52 | cv2.putText(im, id_text, (intbox[0], intbox[1] + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), 53 | thickness=text_thickness) 54 | return im 55 | 56 | 57 | def plot_trajectory(image, tlwhs, track_ids): 58 | image = image.copy() 59 | for one_tlwhs, track_id in zip(tlwhs, track_ids): 60 | color = get_color(int(track_id)) 61 | for tlwh in one_tlwhs: 62 | x1, y1, w, h = tuple(map(int, tlwh)) 63 | cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2) 64 | 65 | return image 66 | 67 | 68 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None): 69 | im = np.copy(image) 70 | text_scale = max(1, image.shape[1] / 800.) 71 | thickness = 2 if text_scale > 1.3 else 1 72 | for i, det in enumerate(tlbrs): 73 | x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int) 74 | if len(det) >= 7: 75 | label = 'det' if det[5] > 0 else 'trk' 76 | if ids is not None: 77 | text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i]) 78 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 79 | thickness=thickness) 80 | else: 81 | text = '{}# {:.2f}'.format(label, det[6]) 82 | 83 | if scores is not None: 84 | text = '{:.2f}'.format(scores[i]) 85 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 86 | thickness=thickness) 87 | 88 | cv2.rectangle(im, (x1, y1), (x2, y2), color, 2) 89 | 90 | return im 91 | -------------------------------------------------------------------------------- /lib/trains/base_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import time 6 | import torch 7 | from progress.bar import Bar 8 | from lib.models.data_parallel import DataParallel 9 | from lib.utils.utils import AverageMeter 10 | 11 | 12 | class ModleWithLoss(torch.nn.Module): 13 | def __init__(self, model, loss): 14 | super(ModleWithLoss, self).__init__() 15 | self.model = model 16 | self.loss = loss 17 | 18 | def forward(self, batch): 19 | outputs = self.model(batch['input']) 20 | loss, loss_stats = self.loss(outputs, batch) 21 | return outputs[-1], loss, loss_stats 22 | 23 | class BaseTrainer(object): 24 | def __init__( 25 | self, opt, model, optimizer=None): 26 | self.opt = opt 27 | self.optimizer = optimizer 28 | self.loss_stats, self.loss = self._get_losses(opt) 29 | self.model_with_loss = ModleWithLoss(model, self.loss) 30 | self.optimizer.add_param_group({'params': self.loss.parameters()}) 31 | 32 | def set_device(self, gpus, chunk_sizes, device): 33 | if len(gpus) > 1: 34 | self.model_with_loss = DataParallel( 35 | self.model_with_loss, device_ids=gpus, 36 | chunk_sizes=chunk_sizes).to(device) 37 | else: 38 | self.model_with_loss = self.model_with_loss.to(device) 39 | 40 | for state in self.optimizer.state.values(): 41 | for k, v in state.items(): 42 | if isinstance(v, torch.Tensor): 43 | state[k] = v.to(device=device, non_blocking=True) 44 | 45 | def run_epoch(self, phase, epoch, data_loader): 46 | model_with_loss = self.model_with_loss 47 | if phase == 'train': 48 | model_with_loss.train() 49 | else: 50 | if len(self.opt.gpus) > 1: 51 | model_with_loss = self.model_with_loss.module 52 | model_with_loss.eval() 53 | torch.cuda.empty_cache() 54 | 55 | opt = self.opt 56 | results = {} 57 | data_time, batch_time = AverageMeter(), AverageMeter() 58 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 59 | num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters 60 | bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) 61 | end = time.time() 62 | for iter_id, batch in enumerate(data_loader): 63 | if iter_id >= num_iters: 64 | break 65 | data_time.update(time.time() - end) 66 | 67 | for k in batch: 68 | if k != 'meta': 69 | batch[k] = batch[k].to(device=opt.device, non_blocking=True) 70 | 71 | output, loss, loss_stats = model_with_loss(batch) 72 | loss = loss.mean() 73 | if phase == 'train': 74 | self.optimizer.zero_grad() 75 | loss.backward() 76 | self.optimizer.step() 77 | batch_time.update(time.time() - end) 78 | end = time.time() 79 | 80 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( 81 | epoch, iter_id, num_iters, phase=phase, 82 | total=bar.elapsed_td, eta=bar.eta_td) 83 | for l in avg_loss_stats: 84 | avg_loss_stats[l].update( 85 | loss_stats[l].mean().item(), batch['input'].size(0)) 86 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg) 87 | if not opt.hide_data_time: 88 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ 89 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) 90 | if opt.print_iter > 0: 91 | if iter_id % opt.print_iter == 0: 92 | print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 93 | else: 94 | bar.next() 95 | 96 | if opt.test: 97 | self.save_result(output, batch, results) 98 | del output, loss, loss_stats, batch 99 | 100 | bar.finish() 101 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 102 | ret['time'] = bar.elapsed_td.total_seconds() / 60. 103 | return ret, results 104 | 105 | 106 | def debug(self, batch, output, iter_id): 107 | raise NotImplementedError 108 | 109 | def save_result(self, output, batch, results): 110 | raise NotImplementedError 111 | 112 | def _get_losses(self, opt): 113 | raise NotImplementedError 114 | 115 | def val(self, epoch, data_loader): 116 | return self.run_epoch('val', epoch, data_loader) 117 | 118 | def train(self, epoch, data_loader): 119 | return self.run_epoch('train', epoch, data_loader) -------------------------------------------------------------------------------- /lib/trains/mot.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import math 6 | import torch 7 | import numpy as np 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | import torchvision 11 | 12 | from fvcore.nn import sigmoid_focal_loss_jit 13 | 14 | from lib.models.losses import FocalLoss, TripletLoss 15 | from lib.models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss 16 | from lib.models.decode import mot_decode 17 | from lib.models.utils import _sigmoid, _tranpose_and_gather_feat 18 | from lib.utils.post_process import ctdet_post_process 19 | from .base_trainer import BaseTrainer 20 | 21 | 22 | class MotLoss(torch.nn.Module): 23 | def __init__(self, opt): 24 | super(MotLoss, self).__init__() 25 | self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss() 26 | self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \ 27 | RegLoss() if opt.reg_loss == 'sl1' else None 28 | self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \ 29 | NormRegL1Loss() if opt.norm_wh else \ 30 | RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg 31 | self.opt = opt 32 | self.emb_dim = opt.reid_dim 33 | self.nID = opt.nID 34 | self.classifier = nn.Linear(self.emb_dim, self.nID) 35 | if opt.id_loss == 'focal': 36 | torch.nn.init.normal_(self.classifier.weight, std=0.01) 37 | prior_prob = 0.01 38 | bias_value = -math.log((1 - prior_prob) / prior_prob) 39 | torch.nn.init.constant_(self.classifier.bias, bias_value) 40 | self.IDLoss = nn.CrossEntropyLoss(ignore_index=-1) 41 | self.emb_scale = math.sqrt(2) * math.log(self.nID - 1) 42 | self.s_det = nn.Parameter(-1.85 * torch.ones(1)) 43 | self.s_id = nn.Parameter(-1.05 * torch.ones(1)) 44 | 45 | def forward(self, outputs, batch): 46 | opt = self.opt 47 | hm_loss, wh_loss, off_loss, id_loss = 0, 0, 0, 0 48 | for s in range(opt.num_stacks): 49 | output = outputs[s] 50 | if not opt.mse_loss: 51 | output['hm'] = _sigmoid(output['hm']) 52 | 53 | hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks 54 | if opt.wh_weight > 0: 55 | wh_loss += self.crit_reg( 56 | output['wh'], batch['reg_mask'], 57 | batch['ind'], batch['wh']) / opt.num_stacks 58 | 59 | if opt.reg_offset and opt.off_weight > 0: 60 | off_loss += self.crit_reg(output['reg'], batch['reg_mask'], 61 | batch['ind'], batch['reg']) / opt.num_stacks 62 | 63 | if opt.id_weight > 0: 64 | id_head = _tranpose_and_gather_feat(output['id'], batch['ind']) 65 | id_head = id_head[batch['reg_mask'] > 0].contiguous() 66 | id_head = self.emb_scale * F.normalize(id_head) 67 | id_target = batch['ids'][batch['reg_mask'] > 0] 68 | 69 | id_output = self.classifier(id_head).contiguous() 70 | if self.opt.id_loss == 'focal': 71 | id_target_one_hot = id_output.new_zeros((id_head.size(0), self.nID)).scatter_(1, 72 | id_target.long().view( 73 | -1, 1), 1) 74 | id_loss += sigmoid_focal_loss_jit(id_output, id_target_one_hot, 75 | alpha=0.25, gamma=2.0, reduction="sum" 76 | ) / id_output.size(0) 77 | else: 78 | id_loss += self.IDLoss(id_output, id_target) 79 | 80 | det_loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + opt.off_weight * off_loss 81 | if opt.multi_loss == 'uncertainty': 82 | loss = torch.exp(-self.s_det) * det_loss + torch.exp(-self.s_id) * id_loss + (self.s_det + self.s_id) 83 | loss *= 0.5 84 | else: 85 | loss = det_loss + 0.1 * id_loss 86 | 87 | loss_stats = {'loss': loss, 'hm_loss': hm_loss, 88 | 'wh_loss': wh_loss, 'off_loss': off_loss, 'id_loss': id_loss} 89 | return loss, loss_stats 90 | 91 | 92 | class MotTrainer(BaseTrainer): 93 | def __init__(self, opt, model, optimizer=None): 94 | super(MotTrainer, self).__init__(opt, model, optimizer=optimizer) 95 | 96 | def _get_losses(self, opt): 97 | loss_states = ['loss', 'hm_loss', 'wh_loss', 'off_loss', 'id_loss'] 98 | loss = MotLoss(opt) 99 | return loss_states, loss 100 | 101 | def save_result(self, output, batch, results): 102 | reg = output['reg'] if self.opt.reg_offset else None 103 | dets = mot_decode( 104 | output['hm'], output['wh'], reg=reg, 105 | cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) 106 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 107 | dets_out = ctdet_post_process( 108 | dets.copy(), batch['meta']['c'].cpu().numpy(), 109 | batch['meta']['s'].cpu().numpy(), 110 | output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) 111 | results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] 112 | -------------------------------------------------------------------------------- /lib/trains/train_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .mot import MotTrainer 6 | 7 | 8 | train_factory = { 9 | 'mot': MotTrainer, 10 | } 11 | -------------------------------------------------------------------------------- /lib/utils/__pycache__/image.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/utils/__pycache__/image.cpython-38.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/post_process.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/utils/__pycache__/post_process.cpython-38.pyc -------------------------------------------------------------------------------- /lib/utils/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/lib/utils/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import cv2 14 | import random 15 | 16 | def flip(img): 17 | return img[:, :, ::-1].copy() 18 | 19 | def transform_preds(coords, center, scale, output_size): 20 | target_coords = np.zeros(coords.shape) 21 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 22 | for p in range(coords.shape[0]): 23 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 24 | return target_coords 25 | 26 | 27 | def get_affine_transform(center, 28 | scale, 29 | rot, 30 | output_size, 31 | shift=np.array([0, 0], dtype=np.float32), 32 | inv=0): 33 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 34 | scale = np.array([scale, scale], dtype=np.float32) 35 | 36 | scale_tmp = scale 37 | src_w = scale_tmp[0] 38 | dst_w = output_size[0] 39 | dst_h = output_size[1] 40 | 41 | rot_rad = np.pi * rot / 180 42 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 43 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 44 | 45 | src = np.zeros((3, 2), dtype=np.float32) 46 | dst = np.zeros((3, 2), dtype=np.float32) 47 | src[0, :] = center + scale_tmp * shift 48 | src[1, :] = center + src_dir + scale_tmp * shift 49 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 50 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 51 | 52 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 53 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 54 | 55 | if inv: 56 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 57 | else: 58 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 59 | 60 | return trans 61 | 62 | 63 | def affine_transform(pt, t): 64 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 65 | new_pt = np.dot(t, new_pt) 66 | return new_pt[:2] 67 | 68 | 69 | def get_3rd_point(a, b): 70 | direct = a - b 71 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 72 | 73 | 74 | def get_dir(src_point, rot_rad): 75 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 76 | 77 | src_result = [0, 0] 78 | src_result[0] = src_point[0] * cs - src_point[1] * sn 79 | src_result[1] = src_point[0] * sn + src_point[1] * cs 80 | 81 | return src_result 82 | 83 | 84 | def crop(img, center, scale, output_size, rot=0): 85 | trans = get_affine_transform(center, scale, rot, output_size) 86 | 87 | dst_img = cv2.warpAffine(img, 88 | trans, 89 | (int(output_size[0]), int(output_size[1])), 90 | flags=cv2.INTER_LINEAR) 91 | 92 | return dst_img 93 | 94 | 95 | def gaussian_radius(det_size, min_overlap=0.7): 96 | height, width = det_size 97 | 98 | a1 = 1 99 | b1 = (height + width) 100 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 101 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 102 | r1 = (b1 + sq1) / 2 103 | 104 | a2 = 4 105 | b2 = 2 * (height + width) 106 | c2 = (1 - min_overlap) * width * height 107 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 108 | r2 = (b2 + sq2) / 2 109 | 110 | a3 = 4 * min_overlap 111 | b3 = -2 * min_overlap * (height + width) 112 | c3 = (min_overlap - 1) * width * height 113 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 114 | r3 = (b3 + sq3) / 2 115 | return min(r1, r2, r3) 116 | 117 | 118 | def gaussian2D(shape, sigma=1): 119 | m, n = [(ss - 1.) / 2. for ss in shape] 120 | y, x = np.ogrid[-m:m+1,-n:n+1] 121 | 122 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 123 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 124 | return h 125 | 126 | def draw_umich_gaussian(heatmap, center, radius, k=1): 127 | diameter = 2 * radius + 1 128 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 129 | 130 | x, y = int(center[0]), int(center[1]) 131 | 132 | height, width = heatmap.shape[0:2] 133 | 134 | left, right = min(x, radius), min(width - x, radius + 1) 135 | top, bottom = min(y, radius), min(height - y, radius + 1) 136 | 137 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 138 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 139 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 140 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 141 | return heatmap 142 | 143 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False): 144 | diameter = 2 * radius + 1 145 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 146 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1) 147 | dim = value.shape[0] 148 | reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value 149 | if is_offset and dim == 2: 150 | delta = np.arange(diameter*2+1) - radius 151 | reg[0] = reg[0] - delta.reshape(1, -1) 152 | reg[1] = reg[1] - delta.reshape(-1, 1) 153 | 154 | x, y = int(center[0]), int(center[1]) 155 | 156 | height, width = heatmap.shape[0:2] 157 | 158 | left, right = min(x, radius), min(width - x, radius + 1) 159 | top, bottom = min(y, radius), min(height - y, radius + 1) 160 | 161 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 162 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right] 163 | masked_gaussian = gaussian[radius - top:radius + bottom, 164 | radius - left:radius + right] 165 | masked_reg = reg[:, radius - top:radius + bottom, 166 | radius - left:radius + right] 167 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 168 | idx = (masked_gaussian >= masked_heatmap).reshape( 169 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1]) 170 | masked_regmap = (1-idx) * masked_regmap + idx * masked_reg 171 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap 172 | return regmap 173 | 174 | 175 | def draw_msra_gaussian(heatmap, center, sigma): 176 | tmp_size = sigma * 3 177 | mu_x = int(center[0] + 0.5) 178 | mu_y = int(center[1] + 0.5) 179 | w, h = heatmap.shape[0], heatmap.shape[1] 180 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 181 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 182 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0: 183 | return heatmap 184 | size = 2 * tmp_size + 1 185 | x = np.arange(0, size, 1, np.float32) 186 | y = x[:, np.newaxis] 187 | x0 = y0 = size // 2 188 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 189 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0] 190 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1] 191 | img_x = max(0, ul[0]), min(br[0], h) 192 | img_y = max(0, ul[1]), min(br[1], w) 193 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum( 194 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]], 195 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]]) 196 | return heatmap 197 | 198 | def grayscale(image): 199 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 200 | 201 | def lighting_(data_rng, image, alphastd, eigval, eigvec): 202 | alpha = data_rng.normal(scale=alphastd, size=(3, )) 203 | image += np.dot(eigvec, eigval * alpha) 204 | 205 | def blend_(alpha, image1, image2): 206 | image1 *= alpha 207 | image2 *= (1 - alpha) 208 | image1 += image2 209 | 210 | def saturation_(data_rng, image, gs, gs_mean, var): 211 | alpha = 1. + data_rng.uniform(low=-var, high=var) 212 | blend_(alpha, image, gs[:, :, None]) 213 | 214 | def brightness_(data_rng, image, gs, gs_mean, var): 215 | alpha = 1. + data_rng.uniform(low=-var, high=var) 216 | image *= alpha 217 | 218 | def contrast_(data_rng, image, gs, gs_mean, var): 219 | alpha = 1. + data_rng.uniform(low=-var, high=var) 220 | blend_(alpha, image, gs_mean) 221 | 222 | def color_aug(data_rng, image, eig_val, eig_vec): 223 | functions = [brightness_, contrast_, saturation_] 224 | random.shuffle(functions) 225 | 226 | gs = grayscale(image) 227 | gs_mean = gs.mean() 228 | for f in functions: 229 | f(data_rng, image, gs, gs_mean, 0.4) 230 | lighting_(data_rng, image, 0.1, eig_val, eig_vec) 231 | -------------------------------------------------------------------------------- /lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from .image import transform_preds 7 | 8 | 9 | def ctdet_post_process(dets, c, s, h, w, num_classes): 10 | # dets: batch x max_dets x dim 11 | # return 1-based class det dict 12 | ret = [] 13 | for i in range(dets.shape[0]): 14 | top_preds = {} 15 | dets[i, :, :2] = transform_preds( 16 | dets[i, :, 0:2], c[i], s[i], (w, h)) 17 | dets[i, :, 2:4] = transform_preds( 18 | dets[i, :, 2:4], c[i], s[i], (w, h)) 19 | classes = dets[i, :, -1] 20 | for j in range(num_classes): 21 | inds = (classes == j) 22 | top_preds[j + 1] = np.concatenate([ 23 | dets[i, inds, :4].astype(np.float32), 24 | dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist() 25 | ret.append(top_preds) 26 | return ret 27 | 28 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import numpy as np 7 | 8 | class AverageMeter(object): 9 | """Computes and stores the average and current value""" 10 | def __init__(self): 11 | self.reset() 12 | 13 | def reset(self): 14 | self.val = 0 15 | self.avg = 0 16 | self.sum = 0 17 | self.count = 0 18 | 19 | def update(self, val, n=1): 20 | self.val = val 21 | self.sum += val * n 22 | self.count += n 23 | if self.count > 0: 24 | self.avg = self.sum / self.count 25 | 26 | 27 | def xyxy2xywh(x): 28 | # Convert bounding box format from [x1, y1, x2, y2] to [x, y, w, h] 29 | y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape) 30 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 31 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 32 | y[:, 2] = x[:, 2] - x[:, 0] 33 | y[:, 3] = x[:, 3] - x[:, 1] 34 | return y 35 | 36 | 37 | def xywh2xyxy(x): 38 | # Convert bounding box format from [x, y, w, h] to [x1, y1, x2, y2] 39 | y = torch.zeros(x.shape) if x.dtype is torch.float32 else np.zeros(x.shape) 40 | y[:, 0] = (x[:, 0] - x[:, 2] / 2) 41 | y[:, 1] = (x[:, 1] - x[:, 3] / 2) 42 | y[:, 2] = (x[:, 0] + x[:, 2] / 2) 43 | y[:, 3] = (x[:, 1] + x[:, 3] / 2) 44 | return y 45 | 46 | def ap_per_class(tp, conf, pred_cls, target_cls): 47 | """ Compute the average precision, given the recall and precision curves. 48 | Method originally from https://github.com/rafaelpadilla/Object-Detection-Metrics. 49 | # Arguments 50 | tp: True positives (list). 51 | conf: Objectness value from 0-1 (list). 52 | pred_cls: Predicted object classes (list). 53 | target_cls: True object classes (list). 54 | # Returns 55 | The average precision as computed in py-faster-rcnn. 56 | """ 57 | 58 | # lists/pytorch to numpy 59 | tp, conf, pred_cls, target_cls = np.array(tp), np.array(conf), np.array(pred_cls), np.array(target_cls) 60 | 61 | # Sort by objectness 62 | i = np.argsort(-conf) 63 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 64 | 65 | # Find unique classes 66 | unique_classes = np.unique(np.concatenate((pred_cls, target_cls), 0)) 67 | 68 | # Create Precision-Recall curve and compute AP for each class 69 | ap, p, r = [], [], [] 70 | for c in unique_classes: 71 | i = pred_cls == c 72 | n_gt = sum(target_cls == c) # Number of ground truth objects 73 | n_p = sum(i) # Number of predicted objects 74 | 75 | if (n_p == 0) and (n_gt == 0): 76 | continue 77 | elif (n_p == 0) or (n_gt == 0): 78 | ap.append(0) 79 | r.append(0) 80 | p.append(0) 81 | else: 82 | # Accumulate FPs and TPs 83 | fpc = np.cumsum(1 - tp[i]) 84 | tpc = np.cumsum(tp[i]) 85 | 86 | # Recall 87 | recall_curve = tpc / (n_gt + 1e-16) 88 | r.append(tpc[-1] / (n_gt + 1e-16)) 89 | 90 | # Precision 91 | precision_curve = tpc / (tpc + fpc) 92 | p.append(tpc[-1] / (tpc[-1] + fpc[-1])) 93 | 94 | # AP from recall-precision curve 95 | ap.append(compute_ap(recall_curve, precision_curve)) 96 | 97 | return np.array(ap), unique_classes.astype('int32'), np.array(r), np.array(p) 98 | 99 | 100 | def compute_ap(recall, precision): 101 | """ Compute the average precision, given the recall and precision curves. 102 | Code originally from https://github.com/rbgirshick/py-faster-rcnn. 103 | # Arguments 104 | recall: The recall curve (list). 105 | precision: The precision curve (list). 106 | # Returns 107 | The average precision as computed in py-faster-rcnn. 108 | """ 109 | # correct AP calculation 110 | # first append sentinel values at the end 111 | 112 | mrec = np.concatenate(([0.], recall, [1.])) 113 | mpre = np.concatenate(([0.], precision, [0.])) 114 | 115 | # compute the precision envelope 116 | for i in range(mpre.size - 1, 0, -1): 117 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 118 | 119 | # to calculate area under PR curve, look for points 120 | # where X axis (recall) changes value 121 | i = np.where(mrec[1:] != mrec[:-1])[0] 122 | 123 | # and sum (\Delta recall) * prec 124 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 125 | return ap 126 | 127 | 128 | def bbox_iou(box1, box2, x1y1x2y2=False): 129 | """ 130 | Returns the IoU of two bounding boxes 131 | """ 132 | N, M = len(box1), len(box2) 133 | if x1y1x2y2: 134 | # Get the coordinates of bounding boxes 135 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] 136 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] 137 | else: 138 | # Transform from center and width to exact coordinates 139 | b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 140 | b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 141 | b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 142 | b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 143 | 144 | # get the coordinates of the intersection rectangle 145 | inter_rect_x1 = torch.max(b1_x1.unsqueeze(1), b2_x1) 146 | inter_rect_y1 = torch.max(b1_y1.unsqueeze(1), b2_y1) 147 | inter_rect_x2 = torch.min(b1_x2.unsqueeze(1), b2_x2) 148 | inter_rect_y2 = torch.min(b1_y2.unsqueeze(1), b2_y2) 149 | # Intersection area 150 | inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1, 0) * torch.clamp(inter_rect_y2 - inter_rect_y1, 0) 151 | # Union Area 152 | b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)) 153 | b1_area = ((b1_x2 - b1_x1) * (b1_y2 - b1_y1)).view(-1,1).expand(N,M) 154 | b2_area = ((b2_x2 - b2_x1) * (b2_y2 - b2_y1)).view(1,-1).expand(N,M) 155 | 156 | return inter_area / (b1_area + b2_area - inter_area + 1e-16) 157 | 158 | 159 | def generate_anchors(nGh, nGw, anchor_wh): 160 | nA = len(anchor_wh) 161 | yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw), indexing='ij') 162 | 163 | mesh = np.stack([xx, yy], axis=0) # Shape 2, nGh, nGw 164 | mesh = np.tile(np.expand_dims(mesh, axis=0), (nA, 1, 1, 1)) # Shape nA x 2 x nGh x nGw 165 | anchor_offset_mesh = np.tile(np.expand_dims(np.expand_dims(anchor_wh, -1), -1), (1, 1, nGh, nGw)) # Shape nA x 2 x nGh x nGw 166 | anchor_mesh = np.concatenate((mesh, anchor_offset_mesh), axis=1) # Shape nA x 4 x nGh x nGw 167 | return anchor_mesh 168 | 169 | 170 | def encode_delta(gt_box_list, fg_anchor_list): 171 | px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \ 172 | fg_anchor_list[:, 2], fg_anchor_list[:,3] 173 | gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \ 174 | gt_box_list[:, 2], gt_box_list[:, 3] 175 | dx = (gx - px) / pw 176 | dy = (gy - py) / ph 177 | dw = np.log(gw/pw) 178 | dh = np.log(gh/ph) 179 | return np.stack((dx, dy, dw, dh), axis=1) 180 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from interface import VideoQt,QtCore,QApplication,QWidget 3 | 4 | if __name__ == "__main__": 5 | QtCore.QCoreApplication.setAttribute(QtCore.Qt.AA_EnableHighDpiScaling) 6 | app = QApplication(sys.argv) 7 | widget = QWidget() 8 | widget.setStyleSheet('background-color:black;') 9 | widget.setWindowOpacity(0.8) 10 | video = VideoQt() 11 | video.setupUi(widget) 12 | widget.show() 13 | sys.exit(app.exec_()) 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | yacs 2 | opencv-python 3 | PyYAML 4 | cython-bbox 5 | scipy 6 | progress 7 | motmetrics 8 | matplotlib 9 | lap 10 | openpyxl 11 | Pillow 12 | tensorboardX 13 | fvcore -------------------------------------------------------------------------------- /src/VideoTimer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from PyQt5.QtCore import QObject, pyqtSignal, QThread, QMutex, QMutexLocker 3 | 4 | class Communicate(QObject): 5 | signal = pyqtSignal(str) 6 | 7 | class VideoTimer(QThread): 8 | def __init__(self, frequent=20): 9 | QThread.__init__(self) 10 | self.stopped = False 11 | self.frequent = frequent 12 | self.timeSignal = Communicate() 13 | self.mutex = QMutex() 14 | 15 | def run(self): 16 | with QMutexLocker(self.mutex): 17 | self.stopped = False 18 | while True: 19 | if self.stopped: 20 | return 21 | self.timeSignal.signal.emit("1") 22 | time.sleep(1 / self.frequent) 23 | 24 | def stop(self): 25 | with QMutexLocker(self.mutex): 26 | self.stopped = True 27 | 28 | def is_stopped(self): 29 | with QMutexLocker(self.mutex): 30 | return self.stopped 31 | 32 | def set_fps(self, fps): 33 | self.frequent = fps -------------------------------------------------------------------------------- /src/built_in_camera_track.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import logging 4 | import torch 5 | import time 6 | import numpy as np 7 | from lib.tracking_utils.timer import Timer 8 | from lib.datasets.dataset.jde import letterbox 9 | from lib.tracking_utils.utils import mkdir_if_missing 10 | from lib.tracking_utils import visualization as vis 11 | 12 | 13 | def built_in_camera_tracker(threshold_value, root_dir, use_gpu, 14 | opts, JDETracker, fourcc, QPixmap, QImage, videolabel, 15 | QMessageBox, QApplication, logger): 16 | try: 17 | cap_test = cv2.VideoCapture(0) 18 | if cap_test is None or not cap_test.isOpened(): 19 | msg_box = QMessageBox(QMessageBox.Warning, 'Warning!', 'The build-in camera is not exit') 20 | msg_box.exec_() 21 | else: 22 | cap_test.release() 23 | 24 | # if flag == 0: 25 | # msg_box = QMessageBox(QMessageBox.Warning, '提示!', '请确保当前设备有摄像头') 26 | # msg_box.exec_() 27 | 28 | # Set params 29 | logger.setLevel(logging.INFO) 30 | print(f'camera_id: {0}') 31 | 32 | model_dir = root_dir + '/models' 33 | 34 | for pth in os.listdir(model_dir): 35 | if pth.split('.')[-1] == 'pth': 36 | model_dir += ('/' + pth) 37 | break 38 | 39 | print(f'model_dir: {model_dir}') 40 | 41 | output_video_dir = root_dir + '/output_built_in_camera' 42 | 43 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0' 44 | opt = opts(current_dir=root_dir,model_path=model_dir, 45 | input_path=None,threshold=threshold_value, 46 | match_threshold=0.8,use_gpu=use_gpu).init() 47 | 48 | opt.output_root = output_video_dir 49 | print(f'current_use_gpus: {opt.gpus}') 50 | print(f'output_video_dir: {output_video_dir}') 51 | mkdir_if_missing(output_video_dir) 52 | 53 | # start to pre_track 54 | capture = cv2.VideoCapture(0) 55 | # frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) 56 | # print(f'frame_count: {frame_count}', frame_count) 57 | # 58 | # progressBar.setMaximum(frame_count) 59 | frame_rate = 30 60 | tracker = JDETracker(opt, frame_rate=frame_rate) 61 | video_name = time.strftime('%Y_%m_%d_%H_%M',time.localtime()) + '_.mp4' 62 | print(f'video_name: {video_name}') 63 | 64 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) 65 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 66 | fourcc = fourcc 67 | writer = cv2.VideoWriter((output_video_dir + '/' + video_name), fourcc, frame_rate, (width, height)) 68 | results = [] 69 | frame_id = 0 70 | timer = Timer() 71 | use_cuda = True 72 | if use_gpu == '-1': 73 | use_cuda = False 74 | 75 | while (True): 76 | try: 77 | # run tracking 78 | ok, frame = capture.read() 79 | if not ok: 80 | break 81 | # frame = cv2.resize(frame, (1920, 1080)) 82 | img, _, _, _ = letterbox(frame, height=1088, width=608) 83 | img = img[:, :, ::-1].transpose(2, 0, 1) 84 | img = np.ascontiguousarray(img, dtype=np.float32) 85 | img /= 255.0 86 | timer.tic() 87 | 88 | if use_cuda: 89 | blob = torch.from_numpy(img).cuda().unsqueeze(0) 90 | else: 91 | blob = torch.from_numpy(img).unsqueeze(0) 92 | 93 | online_targets = tracker.update(blob, frame) 94 | online_tlwhs = [] 95 | online_ids = [] 96 | online_scores = [] 97 | for t in online_targets: 98 | tlwh = t.tlwh 99 | tid = t.track_id 100 | vertical = tlwh[2] / tlwh[3] > 1.6 101 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: 102 | online_tlwhs.append(tlwh) 103 | online_ids.append(tid) 104 | online_scores.append(t.score) 105 | timer.toc() 106 | # save results 107 | # results.append((frame_id + 1, online_tlwhs, online_ids)) 108 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 109 | fps = 1. / timer.average_time 110 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, frame_id=frame_id, 111 | fps=fps) 112 | frame_id += 1 113 | print(f'detect frame: {frame_id}') 114 | 115 | height, width = online_im.shape[:2] 116 | if online_im.ndim == 3: 117 | rgb = cv2.cvtColor(online_im, cv2.COLOR_BGR2RGB) 118 | elif online_im.ndim == 2: 119 | rgb = cv2.cvtColor(online_im, cv2.COLOR_GRAY2BGR) 120 | 121 | temp_image = QImage(rgb.flatten(), width, height, QImage.Format_RGB888) 122 | temp_pixmap = QPixmap.fromImage(temp_image) 123 | writer.write(online_im) 124 | videolabel.setPixmap(temp_pixmap) 125 | QApplication.processEvents() 126 | except: 127 | writer.release() 128 | writer.release() 129 | except: 130 | pass -------------------------------------------------------------------------------- /src/external_camera_track.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import logging 4 | import torch 5 | import time 6 | import numpy as np 7 | from lib.tracking_utils.timer import Timer 8 | from lib.datasets.dataset.jde import letterbox 9 | from lib.tracking_utils.utils import mkdir_if_missing 10 | from lib.tracking_utils import visualization as vis 11 | 12 | 13 | def external_camera_tracker(threshold_value, root_dir, use_gpu, 14 | opts, JDETracker, fourcc, QPixmap, QImage, videolabel, 15 | QMessageBox, QApplication, logger): 16 | try: 17 | cap_test = cv2.VideoCapture(1) 18 | if cap_test is None or not cap_test.isOpened(): 19 | msg_box = QMessageBox(QMessageBox.Warning, 'Warning!', 'The external camera is not exit') 20 | msg_box.exec_() 21 | else: 22 | cap_test.release() 23 | 24 | # if flag == 0: 25 | # msg_box = QMessageBox(QMessageBox.Warning, '提示!', '请确保当前设备有摄像头') 26 | # msg_box.exec_() 27 | 28 | # Set parameters. 29 | logger.setLevel(logging.INFO) 30 | print(f'camera_id: {1}') 31 | 32 | model_dir = root_dir + '/models' 33 | 34 | for pth in os.listdir(model_dir): 35 | if pth.split('.')[-1] == 'pth': 36 | model_dir += ('/' + pth) 37 | break 38 | 39 | print(f'model_dir: {model_dir}') 40 | 41 | output_video_dir = root_dir + '/output_external_camera' 42 | 43 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0' 44 | opt = opts(current_dir=root_dir, model_path=model_dir, 45 | input_path=None, threshold=threshold_value, 46 | match_threshold=0.8, use_gpu=use_gpu).init() 47 | 48 | opt.output_root = output_video_dir 49 | print(f'current_use_gpus: {opt.gpus}') 50 | print(f'output_video_dir: {output_video_dir}') 51 | mkdir_if_missing(output_video_dir) 52 | 53 | # start to pre_track 54 | capture = cv2.VideoCapture(1) 55 | # frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) 56 | # print(f'frame_count: {frame_count}', frame_count) 57 | # 58 | # progressBar.setMaximum(frame_count) 59 | frame_rate = 30 60 | tracker = JDETracker(opt, frame_rate=frame_rate) 61 | video_name = time.strftime('%Y_%m_%d_%H_%M', time.localtime()) + '_.mp4' 62 | print(f'video_name: {video_name}') 63 | 64 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) 65 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 66 | fourcc = fourcc 67 | writer = cv2.VideoWriter((output_video_dir + '/' + video_name), fourcc, frame_rate, (width, height)) 68 | results = [] 69 | frame_id = 0 70 | timer = Timer() 71 | use_cuda = True 72 | if use_gpu == '-1': 73 | use_cuda = False 74 | 75 | while (True): 76 | try: 77 | # run tracking 78 | ok, frame = capture.read() 79 | if not ok: 80 | break 81 | # frame = cv2.resize(frame, (1920, 1080)) 82 | img, _, _, _ = letterbox(frame, height=1088, width=608) 83 | img = img[:, :, ::-1].transpose(2, 0, 1) 84 | img = np.ascontiguousarray(img, dtype=np.float32) 85 | img /= 255.0 86 | timer.tic() 87 | 88 | if use_cuda: 89 | blob = torch.from_numpy(img).cuda().unsqueeze(0) 90 | else: 91 | blob = torch.from_numpy(img).unsqueeze(0) 92 | 93 | online_targets = tracker.update(blob, frame) 94 | online_tlwhs = [] 95 | online_ids = [] 96 | online_scores = [] 97 | for t in online_targets: 98 | tlwh = t.tlwh 99 | tid = t.track_id 100 | vertical = tlwh[2] / tlwh[3] > 1.6 101 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: 102 | online_tlwhs.append(tlwh) 103 | online_ids.append(tid) 104 | online_scores.append(t.score) 105 | timer.toc() 106 | # save results 107 | # results.append((frame_id + 1, online_tlwhs, online_ids)) 108 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 109 | fps = 1. / timer.average_time 110 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, frame_id=frame_id, 111 | fps=fps) 112 | frame_id += 1 113 | print(f'detect frame: {frame_id}') 114 | 115 | height, width = online_im.shape[:2] 116 | if online_im.ndim == 3: 117 | rgb = cv2.cvtColor(online_im, cv2.COLOR_BGR2RGB) 118 | elif online_im.ndim == 2: 119 | rgb = cv2.cvtColor(online_im, cv2.COLOR_GRAY2BGR) 120 | 121 | temp_image = QImage(rgb.flatten(), width, height, QImage.Format_RGB888) 122 | temp_pixmap = QPixmap.fromImage(temp_image) 123 | writer.write(online_im) 124 | videolabel.setPixmap(temp_pixmap) 125 | QApplication.processEvents() 126 | except: 127 | writer.release() 128 | writer.release() 129 | except: 130 | pass -------------------------------------------------------------------------------- /src/tracker/__pycache__/_init_paths.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ReverseSacle/CrowdTracker-Pytorch_Basic/aee67f542c177ded7204a2d86f2d0929a8a76af0/src/tracker/__pycache__/_init_paths.cpython-38.pyc -------------------------------------------------------------------------------- /src/tracker/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/tracker/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | 7 | import logging 8 | import os 9 | import os.path as osp 10 | from lib.opts import opts 11 | from lib.tracking_utils.utils import mkdir_if_missing 12 | from lib.tracking_utils.log import logger 13 | import lib.datasets.dataset.jde as datasets 14 | from track import eval_seq 15 | 16 | 17 | logger.setLevel(logging.INFO) 18 | 19 | 20 | def demo(opt): 21 | result_root = opt.output_root if opt.output_root != '' else '.' 22 | mkdir_if_missing(result_root) 23 | 24 | logger.info('Starting tracking...') 25 | dataloader = datasets.LoadVideo(opt.input_video, opt.img_size) 26 | result_filename = os.path.join(result_root, 'results.txt') 27 | frame_rate = dataloader.frame_rate 28 | 29 | frame_dir = None if opt.output_format == 'text' else osp.join(result_root, 'frame') 30 | eval_seq(opt, dataloader, 'mot', result_filename, 31 | save_dir=frame_dir, show_image=False, frame_rate=frame_rate, 32 | use_cuda=opt.gpus!=[-1]) 33 | 34 | if opt.output_format == 'video': 35 | output_video_path = osp.join(result_root, 'MOT16-03-results.mp4') 36 | cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -b 5000k -c:v mpeg4 {}'.format(osp.join(result_root, 'frame'), output_video_path) 37 | os.system(cmd_str) 38 | 39 | 40 | if __name__ == '__main__': 41 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 42 | opt = opts().init() 43 | demo(opt) 44 | -------------------------------------------------------------------------------- /src/tracker/detect.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | import os 7 | import os.path as osp 8 | import cv2 9 | import logging 10 | import argparse 11 | import motmetrics as mm 12 | import numpy as np 13 | 14 | from lib.tracker.fusetracker import FuseTracker 15 | from lib.tracking_utils import visualization as vis 16 | from lib.tracking_utils.log import logger 17 | from lib.tracking_utils.timer import Timer 18 | from lib.tracking_utils.evaluation import Evaluator 19 | import lib.datasets.dataset.jde as datasets 20 | import torch 21 | from lib.tracking_utils.utils import mkdir_if_missing, tlbr2tlwh 22 | from lib.opts import opts 23 | from lib.models.decode import mot_decode 24 | from lib.utils.post_process import ctdet_post_process 25 | from lib.models.model import create_model, load_model 26 | 27 | 28 | def write_results_score(filename, results): 29 | save_format = '{frame},{x1},{y1},{w},{h},{s}\n' 30 | with open(filename, 'w') as f: 31 | for frame_id, tlwhs, scores in results: 32 | for tlwh, score in zip(tlwhs, scores): 33 | x1, y1, w, h = tlwh 34 | line = save_format.format(frame=frame_id, x1=x1, y1=y1, w=w, h=h, s=score) 35 | f.write(line) 36 | print('save results to {}'.format(filename)) 37 | 38 | 39 | def post_process(opt, dets, meta): 40 | dets = dets.detach().cpu().numpy() 41 | dets = dets.reshape(1, -1, dets.shape[2]) 42 | dets = ctdet_post_process( 43 | dets.copy(), [meta['c']], [meta['s']], 44 | meta['out_height'], meta['out_width'], opt.num_classes) 45 | for j in range(1, opt.num_classes + 1): 46 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) 47 | return dets[0] 48 | 49 | 50 | def merge_outputs(opt, detections): 51 | results = {} 52 | for j in range(1, opt.num_classes + 1): 53 | results[j] = np.concatenate( 54 | [detection[j] for detection in detections], axis=0).astype(np.float32) 55 | 56 | scores = np.hstack( 57 | [results[j][:, 4] for j in range(1, opt.num_classes + 1)]) 58 | if len(scores) > 128: 59 | kth = len(scores) - 128 60 | thresh = np.partition(scores, kth)[kth] 61 | for j in range(1, opt.num_classes + 1): 62 | keep_inds = (results[j][:, 4] >= thresh) 63 | results[j] = results[j][keep_inds] 64 | return results 65 | 66 | 67 | def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): 68 | if save_dir: 69 | mkdir_if_missing(save_dir) 70 | if opt.gpus[0] >= 0: 71 | opt.device = torch.device('cuda') 72 | else: 73 | opt.device = torch.device('cpu') 74 | print('Creating model...') 75 | model = create_model(opt.arch, opt.heads, opt.head_conv) 76 | model = load_model(model, opt.load_model) 77 | # model = torch.nn.DataParallel(model) 78 | model = model.to(opt.device) 79 | model.eval() 80 | timer = Timer() 81 | results = [] 82 | frame_id = 0 83 | for path, img, img0 in dataloader: 84 | if frame_id % 20 == 0: 85 | logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time))) 86 | # run detecting 87 | timer.tic() 88 | blob = torch.from_numpy(img).cuda().unsqueeze(0) 89 | width = img0.shape[1] 90 | height = img0.shape[0] 91 | inp_height = blob.shape[2] 92 | inp_width = blob.shape[3] 93 | c = np.array([width / 2., height / 2.], dtype=np.float32) 94 | s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 95 | meta = {'c': c, 's': s, 96 | 'out_height': inp_height // opt.down_ratio, 97 | 'out_width': inp_width // opt.down_ratio} 98 | with torch.no_grad(): 99 | output = model(blob)[-1] 100 | hm = output['hm'].sigmoid_() 101 | wh = output['wh'] 102 | reg = output['reg'] if opt.reg_offset else None 103 | dets, inds = mot_decode(hm, wh, reg=reg, ltrb=opt.ltrb, K=opt.K) 104 | 105 | dets = post_process(opt, dets, meta) 106 | dets = merge_outputs(opt, [dets])[1] 107 | 108 | dets = dets[dets[:, 4] > 0.1] 109 | dets[:, :4] = tlbr2tlwh(dets[:, :4]) 110 | 111 | tlwhs = [] 112 | scores = [] 113 | for *tlwh, conf in dets: 114 | tlwhs.append(tlwh) 115 | scores.append(conf) 116 | timer.toc() 117 | # save results 118 | results.append((frame_id + 1, tlwhs, scores)) 119 | frame_id += 1 120 | # save results 121 | write_results_score(result_filename, results) 122 | #write_results_score_hie(result_filename, results, data_type) 123 | return frame_id, timer.average_time, timer.calls 124 | 125 | 126 | def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',), exp_name='demo', 127 | save_images=False, save_videos=False, show_image=True): 128 | logger.setLevel(logging.INFO) 129 | result_root = os.path.join(data_root, '..', 'dets', exp_name) 130 | mkdir_if_missing(result_root) 131 | data_type = 'mot' 132 | 133 | # run tracking 134 | accs = [] 135 | n_frame = 0 136 | timer_avgs, timer_calls = [], [] 137 | for seq in seqs: 138 | output_dir = os.path.join(data_root, '..', 'outputs', exp_name, seq) if save_images or save_videos else None 139 | logger.info('start seq: {}'.format(seq)) 140 | dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size) 141 | result_filename = os.path.join(result_root, '{}.txt'.format(seq)) 142 | 143 | nf, ta, tc = eval_seq(opt, dataloader, data_type, result_filename, 144 | save_dir=output_dir, show_image=show_image) 145 | n_frame += nf 146 | timer_avgs.append(ta) 147 | timer_calls.append(tc) 148 | 149 | timer_avgs = np.asarray(timer_avgs) 150 | timer_calls = np.asarray(timer_calls) 151 | all_time = np.dot(timer_avgs, timer_calls) 152 | avg_time = all_time / np.sum(timer_calls) 153 | logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(all_time, 1.0 / avg_time)) 154 | 155 | 156 | if __name__ == '__main__': 157 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 158 | opt = opts().init() 159 | if opt.val_hie: 160 | seqs_str = '''1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19''' 161 | #seqs_str = '''9''' 162 | #seqs_str = '''11 12 13 14 15 16 17 18 19''' 163 | data_root = '/data/yfzhang/MOT/JDE/HIE/HIE20/images/train' 164 | elif opt.test_hie: 165 | seqs_str = '''20 21 22 23 24 25 26 27 28 29 30 31 32''' 166 | seqs_str = '''25''' 167 | data_root = '/data/yfzhang/MOT/JDE/HIE/HIE20/images/test' 168 | elif opt.val_mot17: 169 | seqs_str = '''MOT17-02-SDP 170 | MOT17-04-SDP 171 | MOT17-05-SDP 172 | MOT17-09-SDP 173 | MOT17-10-SDP 174 | MOT17-11-SDP 175 | MOT17-13-SDP''' 176 | #seqs_str = '''MOT17-02-SDP''' 177 | data_root = os.path.join(opt.data_dir, 'MOT17/images/train') 178 | else: 179 | seqs_str = None 180 | data_root = None 181 | seqs = [seq.strip() for seq in seqs_str.split()] 182 | 183 | main(opt, 184 | data_root=data_root, 185 | seqs=seqs, 186 | exp_name='fairmot_mot17', 187 | show_image=False, 188 | save_images=False, 189 | save_videos=False) 190 | -------------------------------------------------------------------------------- /src/tracker/to_track.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import torch 7 | import numpy as np 8 | import os 9 | import logging 10 | import time 11 | 12 | from lib.opts import opts 13 | from lib.tracking_utils.utils import mkdir_if_missing 14 | from lib.tracking_utils.log import logger 15 | from lib.tracker.multitracker import JDETracker 16 | from lib.tracking_utils.timer import Timer 17 | from lib.tracking_utils import visualization as vis 18 | from lib.datasets.dataset.jde import letterbox 19 | 20 | logger.setLevel(logging.INFO) 21 | 22 | # set parameters. 23 | # 设置参数 24 | current_dir = os.path.dirname(os.path.realpath(__file__)).\ 25 | replace('\\','/').replace('/src/tracker','') 26 | input_path = current_dir + '/videos/MOT16-03.mp4' 27 | input_file_name = (input_path.split('/')[-1]).split('.')[0] 28 | output_path = current_dir + '/output_video/' 29 | model_dir = current_dir + '/models' 30 | threshold = 0.4 31 | match_threshold = 0.8 32 | 33 | # Choose the GPU that you want to do this with(CPU: -1,GPU_1: 0,GPU_2: 1). 34 | # 追踪用的GPU(CPU: -1,GPU_1: 0,GPU_2: 1) 35 | set_use_gpu = '-1' 36 | print(f'input_path: {input_path}') 37 | 38 | for pth in os.listdir(model_dir): 39 | if pth.split('.')[-1] == 'pth': 40 | model_dir += ('/' + pth) 41 | break 42 | 43 | print(f'model_path: {model_dir}') 44 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0' 45 | opt = opts(current_dir=current_dir, model_path=model_dir, 46 | input_path=input_path, threshold=threshold, 47 | match_threshold=match_threshold, use_gpu=set_use_gpu).init() 48 | opt.output_root = output_path 49 | print(f'current_use_gpus: {opt.gpus}') 50 | if opt.output_root: 51 | mkdir_if_missing(opt.output_root) 52 | # frame_dir = None if opt.output_format == 'text' else osp.join(result_root, 'frame') 53 | 54 | #start to pre_track 55 | capture = cv2.VideoCapture(input_path) 56 | frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) 57 | print(f'frame_count: {frame_count}') 58 | frame_rate = 30 59 | tracker = JDETracker(opt, frame_rate=frame_rate) 60 | video_name = input_file_name + '_' + time.strftime('%Y_%m_%d_%H_%M',time.localtime()) + '_.mp4' 61 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) 62 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 63 | fps = frame_rate 64 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 65 | writer = cv2.VideoWriter(opt.output_root + video_name, fourcc, fps, (width, height)) 66 | results = [] 67 | frame_id = 0 68 | timer = Timer() 69 | use_cuda = True 70 | if set_use_gpu == '-1': 71 | use_cuda = False 72 | 73 | while(True): 74 | # run tracking 75 | ok,frame = capture.read() 76 | if not ok: 77 | break 78 | frame = cv2.resize(frame, (1920, 1080)) 79 | img, _, _, _ = letterbox(frame, height=1088, width=608) 80 | img = img[:, :, ::-1].transpose(2, 0, 1) 81 | img = np.ascontiguousarray(img, dtype=np.float32) 82 | img /= 255.0 83 | timer.tic() 84 | if use_cuda: 85 | blob = torch.from_numpy(img).cuda().unsqueeze(0) 86 | else: 87 | blob = torch.from_numpy(img).unsqueeze(0) 88 | online_targets = tracker.update(blob, frame) 89 | online_tlwhs = [] 90 | online_ids = [] 91 | online_scores = [] 92 | for t in online_targets: 93 | tlwh = t.tlwh 94 | tid = t.track_id 95 | vertical = tlwh[2] / tlwh[3] > 1.6 96 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: 97 | online_tlwhs.append(tlwh) 98 | online_ids.append(tid) 99 | online_scores.append(t.score) 100 | timer.toc() 101 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 102 | fps = 1. / timer.average_time 103 | # save results 104 | #results.append((frame_id + 1, online_tlwhs, online_ids)) 105 | #results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 106 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, frame_id=frame_id, 107 | fps=fps) 108 | frame_id += 1 109 | print(f'detect frame: {frame_id}') 110 | im = np.array(online_im) 111 | writer.write(im) 112 | writer.release() 113 | capture.release() 114 | # save results 115 | # write_results(result_filename, results, data_type) 116 | # write_results_score(result_filename, results, data_type) 117 | -------------------------------------------------------------------------------- /src/tracker/to_track_camera.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import torch 7 | import numpy as np 8 | import os 9 | import logging 10 | import time 11 | 12 | from lib.opts import opts 13 | from lib.tracking_utils.utils import mkdir_if_missing 14 | from lib.tracking_utils.log import logger 15 | from lib.tracker.multitracker import JDETracker 16 | from lib.tracking_utils.timer import Timer 17 | from lib.tracking_utils import visualization as vis 18 | from lib.datasets.dataset.jde import letterbox 19 | 20 | logger.setLevel(logging.INFO) 21 | # set parameters. 22 | # 设置参数 23 | current_dir = os.path.dirname(os.path.realpath(__file__))\ 24 | .replace('\\','/').replace('/src/tracker','') 25 | input_path = None 26 | input_file_name = None 27 | model_dir = current_dir + '/models' 28 | output_path = current_dir + '/output_camera' 29 | threshold = 0.4 30 | match_threshold = 0.8 31 | camera_id = -1 32 | if input_path == None: 33 | camera_id = 0 34 | 35 | # Choose the GPU that you want to do this with(CPU: -1,GPU_1: 0,GPU_2: 1). 36 | # 追踪用的GPU(CPU: -1,GPU_1: 0,GPU_2: 1) 37 | set_use_gpu = '-1' 38 | print(f'camera_id: {camera_id}') 39 | 40 | for pth in os.listdir(model_dir): 41 | if pth.split('.')[-1] == 'pth': 42 | model_dir += ('/' + pth) 43 | break 44 | 45 | print(f'model_path: {model_dir}') 46 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0' 47 | opt = opts(current_dir=current_dir, model_path=model_dir, 48 | input_path=input_path, threshold=threshold, 49 | match_threshold=match_threshold, use_gpu=set_use_gpu).init() 50 | opt.output_root = output_path 51 | print(f'current_use_gpus: {opt.gpus}') 52 | print(f'output_path: {opt.output_root}') 53 | mkdir_if_missing(opt.output_root) 54 | # frame_dir = None if qq_format == 'text' else osp.join(result_root, 'frame') 55 | 56 | #start to pre_track 57 | capture = cv2.VideoCapture(camera_id) 58 | frame_rate = 30 59 | tracker = JDETracker(opt, frame_rate=frame_rate) 60 | 61 | #set current time to be the video-file name(设置当前时间为摄像头保存文件名) 62 | video_name = time.strftime('%Y_%m_%d_%H_%M',time.localtime()) + '_.mp4' 63 | print(f'video_name: {video_name}') 64 | 65 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) 66 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 67 | fps = frame_rate 68 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 69 | writer = cv2.VideoWriter((opt.output_root + '/' + video_name), fourcc, fps, (width, height)) 70 | results = [] 71 | frame_id = 0 72 | timer = Timer() 73 | use_cuda = True 74 | if set_use_gpu == '-1': 75 | use_cuda = False 76 | 77 | while(True): 78 | # run tracking 79 | ok,frame = capture.read() 80 | if not ok: 81 | break 82 | #frame = cv2.resize(frame, (1920, 1080)) 83 | img, _, _, _ = letterbox(frame, height=1088, width=608) 84 | img = img[:, :, ::-1].transpose(2, 0, 1) 85 | img = np.ascontiguousarray(img, dtype=np.float32) 86 | img /= 255.0 87 | timer.tic() 88 | if use_cuda: 89 | blob = torch.from_numpy(img).cuda().unsqueeze(0) 90 | else: 91 | blob = torch.from_numpy(img).unsqueeze(0) 92 | online_targets = tracker.update(blob, frame) 93 | online_tlwhs = [] 94 | online_ids = [] 95 | online_scores = [] 96 | for t in online_targets: 97 | tlwh = t.tlwh 98 | tid = t.track_id 99 | vertical = tlwh[2] / tlwh[3] > 1.6 100 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: 101 | online_tlwhs.append(tlwh) 102 | online_ids.append(tid) 103 | online_scores.append(t.score) 104 | timer.toc() 105 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 106 | fps = 1. / timer.average_time 107 | # save results 108 | #results.append((frame_id + 1, online_tlwhs, online_ids)) 109 | #results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 110 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, 111 | frame_id=frame_id, fps=fps) 112 | frame_id += 1 113 | print(f'detect frame: {frame_id}') 114 | im = np.array(online_im) 115 | writer.write(online_im) 116 | cv2.imshow('test',online_im) 117 | if cv2.waitKey(1) & 0xFF == ord('q'): 118 | break 119 | capture.release() 120 | writer.release() 121 | cv2.destroyAllWindows() 122 | -------------------------------------------------------------------------------- /src/tracker/track.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import _init_paths 6 | import os 7 | import os.path as osp 8 | import cv2 9 | import logging 10 | import argparse 11 | import motmetrics as mm 12 | import numpy as np 13 | import torch 14 | 15 | from lib.tracker.multitracker import JDETracker 16 | from lib.tracking_utils import visualization as vis 17 | from lib.tracking_utils.log import logger 18 | from lib.tracking_utils.timer import Timer 19 | from lib.tracking_utils.evaluation import Evaluator 20 | import lib.datasets.dataset.jde as datasets 21 | 22 | from lib.tracking_utils.utils import mkdir_if_missing 23 | from lib.opts import opts 24 | 25 | 26 | def write_results(filename, results, data_type): 27 | if data_type == 'mot': 28 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 29 | elif data_type == 'kitti': 30 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 31 | else: 32 | raise ValueError(data_type) 33 | 34 | with open(filename, 'w') as f: 35 | for frame_id, tlwhs, track_ids in results: 36 | if data_type == 'kitti': 37 | frame_id -= 1 38 | for tlwh, track_id in zip(tlwhs, track_ids): 39 | if track_id < 0: 40 | continue 41 | x1, y1, w, h = tlwh 42 | x2, y2 = x1 + w, y1 + h 43 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) 44 | f.write(line) 45 | logger.info('save results to {}'.format(filename)) 46 | 47 | 48 | def write_results_score(filename, results, data_type): 49 | if data_type == 'mot': 50 | save_format = '{frame},{id},{x1},{y1},{w},{h},{s},1,-1,-1,-1\n' 51 | elif data_type == 'kitti': 52 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 53 | else: 54 | raise ValueError(data_type) 55 | 56 | with open(filename, 'w') as f: 57 | for frame_id, tlwhs, track_ids, scores in results: 58 | if data_type == 'kitti': 59 | frame_id -= 1 60 | for tlwh, track_id, score in zip(tlwhs, track_ids, scores): 61 | if track_id < 0: 62 | continue 63 | x1, y1, w, h = tlwh 64 | x2, y2 = x1 + w, y1 + h 65 | line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, s=score) 66 | f.write(line) 67 | logger.info('save results to {}'.format(filename)) 68 | 69 | 70 | def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30, use_cuda=True): 71 | if save_dir: 72 | mkdir_if_missing(save_dir) 73 | tracker = JDETracker(opt, frame_rate=frame_rate) 74 | timer = Timer() 75 | results = [] 76 | frame_id = 0 77 | #for path, img, img0 in dataloader: 78 | for i, (path, img, img0) in enumerate(dataloader): 79 | #if i % 8 != 0: 80 | #continue 81 | if frame_id % 20 == 0: 82 | logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time))) 83 | 84 | # run tracking 85 | timer.tic() 86 | if use_cuda: 87 | blob = torch.from_numpy(img).cuda().unsqueeze(0) 88 | else: 89 | blob = torch.from_numpy(img).unsqueeze(0) 90 | online_targets = tracker.update(blob, img0) 91 | online_tlwhs = [] 92 | online_ids = [] 93 | online_scores = [] 94 | for t in online_targets: 95 | tlwh = t.tlwh 96 | tid = t.track_id 97 | vertical = tlwh[2] / tlwh[3] > 1.6 98 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: 99 | online_tlwhs.append(tlwh) 100 | online_ids.append(tid) 101 | online_scores.append(t.score) 102 | timer.toc() 103 | # save results 104 | results.append((frame_id + 1, online_tlwhs, online_ids)) 105 | #results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 106 | if show_image or save_dir is not None: 107 | online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id, 108 | fps=1. / timer.average_time) 109 | if show_image: 110 | cv2.imshow('online_im', online_im) 111 | if save_dir is not None: 112 | cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im) 113 | frame_id += 1 114 | # save results 115 | #write_results(result_filename, results, data_type) 116 | #write_results_score(result_filename, results, data_type) 117 | return frame_id, timer.average_time, timer.calls 118 | 119 | 120 | def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',), exp_name='demo', 121 | save_images=False, save_videos=False, show_image=True): 122 | logger.setLevel(logging.INFO) 123 | result_root = os.path.join(data_root, '..', 'results', exp_name) 124 | mkdir_if_missing(result_root) 125 | data_type = 'mot' 126 | 127 | # run tracking 128 | accs = [] 129 | n_frame = 0 130 | timer_avgs, timer_calls = [], [] 131 | for seq in seqs: 132 | output_dir = os.path.join(data_root, '..', 'outputs', exp_name, seq) if save_images or save_videos else None 133 | logger.info('start seq: {}'.format(seq)) 134 | dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size) 135 | result_filename = os.path.join(result_root, '{}.txt'.format(seq)) 136 | meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read() 137 | frame_rate = int(meta_info[meta_info.find('frameRate') + 10:meta_info.find('\nseqLength')]) 138 | nf, ta, tc = eval_seq(opt, dataloader, data_type, result_filename, 139 | save_dir=output_dir, show_image=show_image, frame_rate=frame_rate) 140 | n_frame += nf 141 | timer_avgs.append(ta) 142 | timer_calls.append(tc) 143 | 144 | # eval 145 | logger.info('Evaluate seq: {}'.format(seq)) 146 | evaluator = Evaluator(data_root, seq, data_type) 147 | accs.append(evaluator.eval_file(result_filename)) 148 | if save_videos: 149 | output_video_path = osp.join(output_dir, '{}.mp4'.format(seq)) 150 | cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -c:v copy {}'.format(output_dir, output_video_path) 151 | os.system(cmd_str) 152 | timer_avgs = np.asarray(timer_avgs) 153 | timer_calls = np.asarray(timer_calls) 154 | all_time = np.dot(timer_avgs, timer_calls) 155 | avg_time = all_time / np.sum(timer_calls) 156 | logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(all_time, 1.0 / avg_time)) 157 | 158 | # get summary 159 | metrics = mm.metrics.motchallenge_metrics 160 | mh = mm.metrics.create() 161 | summary = Evaluator.get_summary(accs, seqs, metrics) 162 | strsummary = mm.io.render_summary( 163 | summary, 164 | formatters=mh.formatters, 165 | namemap=mm.io.motchallenge_metric_names 166 | ) 167 | print(strsummary) 168 | Evaluator.save_summary(summary, os.path.join(result_root, 'summary_{}.xlsx'.format(exp_name))) 169 | -------------------------------------------------------------------------------- /src/video_track.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import logging 4 | import torch 5 | import time 6 | import numpy as np 7 | from lib.tracking_utils.timer import Timer 8 | from lib.datasets.dataset.jde import letterbox 9 | from lib.tracking_utils.utils import mkdir_if_missing 10 | from lib.tracking_utils import visualization as vis 11 | 12 | def video_tracker(threshold_value,root_dir,input_video_dir,use_gpus, 13 | opts,JDETracker,fourcc, 14 | QMessageBox,progressBar,QApplication,logger): 15 | 16 | try: 17 | threshold = threshold_value 18 | 19 | if os.path.exists(input_video_dir): 20 | file_name = (input_video_dir.split('.')[0]).split('/')[-1] + '_' + time.strftime('%Y_%m_%d_%H_%M',time.localtime()) 21 | output_video_dir = root_dir + '/output_video' 22 | print(f'output video dir:{output_video_dir}') 23 | else: 24 | msg_box = QMessageBox(QMessageBox.Warning, 'Warning!', "There is no exist the video file") 25 | msg_box.exec_() 26 | 27 | progressBar.setValue(0) 28 | logger.setLevel(logging.INFO) 29 | model_dir = root_dir + '/models' 30 | 31 | for pth in os.listdir(model_dir): 32 | if pth.split('.')[-1] == 'pth': 33 | model_dir += ('/' + pth) 34 | break 35 | print(f'model_dir: {model_dir}') 36 | 37 | # os.environ['CUDA_VISIBLE_DEVICES'] = '0' 38 | opt = opts(current_dir=root_dir,model_path=model_dir, 39 | input_path=input_video_dir,threshold=threshold, 40 | match_threshold=0.8,use_gpu=use_gpus).init() 41 | 42 | opt.output_root = output_video_dir 43 | print(f'current_use_gpus: {opt.gpus}') 44 | 45 | if opt.output_root: 46 | mkdir_if_missing(opt.output_root) 47 | 48 | # start to pre_track 49 | capture = cv2.VideoCapture(input_video_dir) 50 | frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) 51 | print(f'frame_count: {frame_count}') 52 | 53 | progressBar.setMaximum(frame_count) 54 | 55 | # start to run track 56 | frame_rate = 30 57 | tracker = JDETracker(opt, frame_rate=frame_rate) 58 | width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) 59 | height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) 60 | fourcc = fourcc 61 | writer = cv2.VideoWriter(output_video_dir + '/' + file_name + '.mp4', 62 | fourcc, frame_rate, (width, height)) 63 | results = [] 64 | frame_id = 0 65 | timer = Timer() 66 | use_cuda = True 67 | 68 | if '-1' == use_gpus: 69 | use_cuda = False 70 | 71 | step = 0 72 | while (True): 73 | # run tracking 74 | step += 1 75 | ok, frame = capture.read() 76 | if not ok: 77 | break 78 | frame = cv2.resize(frame, (1920, 1080)) 79 | img, _, _, _ = letterbox(frame, height=1088, width=608) 80 | img = img[:, :, ::-1].transpose(2, 0, 1) 81 | img = np.ascontiguousarray(img, dtype=np.float32) 82 | img /= 255.0 83 | timer.tic() 84 | 85 | if use_cuda: 86 | blob = torch.from_numpy(img).cuda().unsqueeze(0) 87 | else: 88 | blob = torch.from_numpy(img).unsqueeze(0) 89 | 90 | online_targets = tracker.update(blob, frame) 91 | online_tlwhs = [] 92 | online_ids = [] 93 | online_scores = [] 94 | for t in online_targets: 95 | tlwh = t.tlwh 96 | tid = t.track_id 97 | vertical = tlwh[2] / tlwh[3] > 1.6 98 | if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: 99 | online_tlwhs.append(tlwh) 100 | online_ids.append(tid) 101 | online_scores.append(t.score) 102 | timer.toc() 103 | # save results 104 | # results.append((frame_id + 1, online_tlwhs, online_ids)) 105 | results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) 106 | fps = 1. / timer.average_time 107 | online_im = vis.plot_tracking(frame, online_tlwhs, online_ids, 108 | frame_id=frame_id,fps=fps) 109 | frame_id += 1 110 | print(f'detect frame: {frame_id}') 111 | 112 | writer.write(online_im) 113 | progressBar.setValue(step) 114 | QApplication.processEvents() 115 | 116 | msg_box = QMessageBox(QMessageBox.Warning, '提示!', "视频预测完成") 117 | msg_box.exec_() 118 | writer.release() 119 | capture.release() 120 | 121 | return 1 122 | except: 123 | return 0 --------------------------------------------------------------------------------