├── .idea ├── CenterNetPerson.iml └── modules.xml ├── README.md ├── conda_packagelist.txt ├── config.py ├── config ├── CenterNet-52-multi_scale.json └── CenterNet-52.json ├── data └── demo │ ├── 000001.jpg │ └── 000003.jpg ├── db ├── __init__.py ├── base.py ├── cityperson.py ├── datasets.py └── detection.py ├── demo.py ├── external ├── .gitignore ├── Makefile ├── __init__.py ├── nms.pyx ├── nms.so └── setup.py ├── models ├── CenterNet-104.py ├── CenterNet-52.py ├── __init__.py └── py_utils │ ├── __init__.py │ ├── _cpools │ ├── .gitignore │ ├── __init__.py │ ├── setup.py │ └── src │ │ ├── bottom_pool.cpp │ │ ├── left_pool.cpp │ │ ├── right_pool.cpp │ │ └── top_pool.cpp │ ├── data_parallel.py │ ├── kp.py │ ├── kp_utils.py │ ├── scatter_gather.py │ └── utils.py ├── nnet ├── __init__.py └── py_factory.py ├── sample ├── __init__.py ├── pedestrian.py └── utils.py ├── test.py ├── test ├── __init__.py ├── base.py ├── centernet.py ├── detector.py ├── pedestrian.py └── vis_utils.py ├── train.py └── utils ├── __init__.py ├── image.py └── tqdm.py /.idea/CenterNetPerson.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 13 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | ##Pedestrian detection based on CenterNet 3 | 4 | In this repo, we re-train the centernet on CityPerson dataset to get a pedestrian detector 5 | [CenterNet](https://github.com/Duankaiwen/CenterNet) 6 | 7 | 8 | ##Preparation 9 | 10 | Please first install [Anaconda](https://anaconda.org) and create an Anaconda environment using the provided package list. 11 | ``` 12 | conda create --name CenterNet --file conda_packagelist.txt 13 | ``` 14 | 15 | After you create the environment, activate it. 16 | ``` 17 | source activate CenterNet 18 | ``` 19 | 20 | ## Compiling Corner Pooling Layers 21 | ``` 22 | cd /models/py_utils/_cpools/ 23 | python setup.py install --user 24 | ``` 25 | 26 | ## Compiling NMS 27 | ``` 28 | cd /external 29 | make 30 | ``` 31 | 32 | ## CityPerson dataset 33 | 34 | - Download the CityPerson dataset and label files in [images](https://www.cityscapes-dataset.com/file-handling/?packageID=3), [label](https://www.cityscapes-dataset.com/file-handling/?packageID=28) 35 | - create a softlink in `data` to your CityPerson data 36 | ``` 37 | ln -s #to/yourdata/CityPerson data/ 38 | ``` 39 | 40 | ## Training and Evaluation 41 | To train CenterNet-52 42 | ```buildoutcfg 43 | python train.py --cfg_file CenterNet-52 44 | ``` 45 | The default configure in `config/CenterNet-52.json` is 2 (12G) GPUs and batchsize=12, you can modify them according to your case. 46 | 47 | To evaluate your detector 48 | ```buildoutcfg 49 | python test.py --cfg_file CenterNet-52 --testiter #checkpoint_epoch 50 | ``` 51 | 52 | ## Demo 53 | The demo images are stored in `data/demo` 54 | ```buildoutcfg 55 | python demo.py 56 | ``` -------------------------------------------------------------------------------- /conda_packagelist.txt: -------------------------------------------------------------------------------- 1 | # This file may be used to create an environment using: 2 | # $ conda create --name --file 3 | # platform: linux-64 4 | @EXPLICIT 5 | https://repo.continuum.io/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2 6 | https://repo.continuum.io/pkgs/main/linux-64/bzip2-1.0.6-h9a117a8_4.tar.bz2 7 | https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2018.4.16-0.tar.bz2 8 | https://conda.anaconda.org/caffe2/linux-64/caffe2-cuda8.0-cudnn7-0.8.dev-py36_2018.05.14.tar.bz2 9 | https://repo.continuum.io/pkgs/main/linux-64/cairo-1.14.12-h7636065_2.tar.bz2 10 | https://repo.continuum.io/pkgs/main/linux-64/certifi-2018.4.16-py36_0.tar.bz2 11 | https://repo.continuum.io/pkgs/main/linux-64/cffi-1.11.5-py36h9745a5d_0.tar.bz2 12 | https://repo.continuum.io/pkgs/free/linux-64/cudatoolkit-8.0-3.tar.bz2 13 | https://repo.continuum.io/pkgs/main/linux-64/cycler-0.10.0-py36h93f1223_0.tar.bz2 14 | https://repo.continuum.io/pkgs/main/linux-64/dbus-1.13.2-h714fa37_1.tar.bz2 15 | https://repo.continuum.io/pkgs/main/linux-64/expat-2.2.5-he0dffb1_0.tar.bz2 16 | https://repo.continuum.io/pkgs/main/linux-64/ffmpeg-3.4-h7264315_0.tar.bz2 17 | https://repo.continuum.io/pkgs/main/linux-64/fontconfig-2.12.6-h49f89f6_0.tar.bz2 18 | https://repo.continuum.io/pkgs/free/linux-64/freeglut-2.8.1-0.tar.bz2 19 | https://repo.continuum.io/pkgs/main/linux-64/freetype-2.8-hab7d2ae_1.tar.bz2 20 | https://repo.continuum.io/pkgs/free/linux-64/future-0.16.0-py36_1.tar.bz2 21 | https://repo.continuum.io/pkgs/main/linux-64/gflags-2.2.1-hf484d3e_0.tar.bz2 22 | https://repo.continuum.io/pkgs/main/linux-64/glib-2.56.1-h000015b_0.tar.bz2 23 | https://repo.continuum.io/pkgs/main/linux-64/glog-0.3.5-hf484d3e_1.tar.bz2 24 | https://repo.continuum.io/pkgs/main/linux-64/graphite2-1.3.11-hf63cedd_1.tar.bz2 25 | https://repo.continuum.io/pkgs/main/linux-64/gst-plugins-base-1.14.0-hbbd80ab_1.tar.bz2 26 | https://repo.continuum.io/pkgs/main/linux-64/gstreamer-1.14.0-hb453b48_1.tar.bz2 27 | https://repo.continuum.io/pkgs/main/linux-64/h5py-2.8.0-py36hca9c191_0.tar.bz2 28 | https://repo.continuum.io/pkgs/main/linux-64/harfbuzz-1.7.6-h5f0a787_1.tar.bz2 29 | https://repo.continuum.io/pkgs/main/linux-64/hdf5-1.8.18-h6792536_1.tar.bz2 30 | https://repo.continuum.io/pkgs/main/linux-64/icu-58.2-h9c2bf20_1.tar.bz2 31 | https://repo.continuum.io/pkgs/main/linux-64/intel-openmp-2018.0.0-8.tar.bz2 32 | https://repo.continuum.io/pkgs/main/linux-64/jasper-2.0.14-h07fcdf6_0.tar.bz2 33 | https://repo.continuum.io/pkgs/main/linux-64/cython-0.26.1-py36h21c49d0_0.tar.bz2 34 | https://repo.continuum.io/pkgs/main/linux-64/jpeg-9b-h024ee3a_2.tar.bz2 35 | https://repo.continuum.io/pkgs/main/linux-64/kiwisolver-1.0.1-py36h764f252_0.tar.bz2 36 | https://repo.continuum.io/pkgs/main/linux-64/libedit-3.1-heed3624_0.tar.bz2 37 | https://repo.continuum.io/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2 38 | https://repo.continuum.io/pkgs/main/linux-64/libgcc-ng-7.2.0-hdf63c60_3.tar.bz2 39 | https://repo.continuum.io/pkgs/main/linux-64/libgfortran-ng-7.2.0-hdf63c60_3.tar.bz2 40 | https://repo.continuum.io/pkgs/main/linux-64/libglu-9.0.0-h0c0bdc1_1.tar.bz2 41 | https://repo.continuum.io/pkgs/main/linux-64/libopus-1.2.1-hb9ed12e_0.tar.bz2 42 | https://repo.continuum.io/pkgs/main/linux-64/libpng-1.6.34-hb9fc6fc_0.tar.bz2 43 | https://repo.continuum.io/pkgs/main/linux-64/libprotobuf-3.5.2-h6f1eeef_0.tar.bz2 44 | https://repo.continuum.io/pkgs/main/linux-64/libstdcxx-ng-7.2.0-hdf63c60_3.tar.bz2 45 | https://repo.continuum.io/pkgs/main/linux-64/libtiff-4.0.9-h28f6b97_0.tar.bz2 46 | https://repo.continuum.io/pkgs/main/linux-64/libvpx-1.6.1-h888fd40_0.tar.bz2 47 | https://repo.continuum.io/pkgs/main/linux-64/libxcb-1.13-h1bed415_1.tar.bz2 48 | https://repo.continuum.io/pkgs/main/linux-64/libxml2-2.9.8-hf84eae3_0.tar.bz2 49 | https://repo.continuum.io/pkgs/main/linux-64/matplotlib-2.2.2-py36h0e671d2_1.tar.bz2 50 | https://repo.continuum.io/pkgs/main/linux-64/mkl-2018.0.2-1.tar.bz2 51 | https://repo.continuum.io/pkgs/main/linux-64/mkl_fft-1.0.1-py36h3010b51_0.tar.bz2 52 | https://repo.continuum.io/pkgs/main/linux-64/mkl_random-1.0.1-py36h629b387_0.tar.bz2 53 | https://repo.continuum.io/pkgs/main/linux-64/ncurses-6.0-h9df7e31_2.tar.bz2 54 | https://repo.continuum.io/pkgs/main/linux-64/ninja-1.8.2-py36h6bb024c_1.tar.bz2 55 | https://repo.continuum.io/pkgs/main/linux-64/numpy-1.14.3-py36hcd700cb_1.tar.bz2 56 | https://repo.continuum.io/pkgs/main/linux-64/numpy-base-1.14.3-py36h9be14a7_1.tar.bz2 57 | https://repo.continuum.io/pkgs/main/linux-64/olefile-0.45.1-py36_0.tar.bz2 58 | https://repo.continuum.io/pkgs/main/linux-64/opencv-3.3.1-py36h9248ab4_2.tar.bz2 59 | https://repo.continuum.io/pkgs/main/linux-64/openssl-1.0.2o-h20670df_0.tar.bz2 60 | https://repo.continuum.io/pkgs/main/linux-64/pcre-8.42-h439df22_0.tar.bz2 61 | https://repo.continuum.io/pkgs/main/linux-64/pillow-5.1.0-py36h3deb7b8_0.tar.bz2 62 | https://repo.continuum.io/pkgs/main/linux-64/pip-10.0.1-py36_0.tar.bz2 63 | https://repo.continuum.io/pkgs/main/linux-64/pixman-0.34.0-hceecf20_3.tar.bz2 64 | https://conda.anaconda.org/conda-forge/linux-64/protobuf-3.5.2-py36_0.tar.bz2 65 | https://repo.continuum.io/pkgs/main/linux-64/pycparser-2.18-py36hf9f622e_1.tar.bz2 66 | https://repo.continuum.io/pkgs/main/linux-64/pyparsing-2.2.0-py36hee85983_1.tar.bz2 67 | https://repo.continuum.io/pkgs/main/linux-64/pyqt-5.9.2-py36h751905a_0.tar.bz2 68 | https://repo.continuum.io/pkgs/main/linux-64/python-3.6.4-hc3d631a_3.tar.bz2 69 | https://repo.continuum.io/pkgs/main/linux-64/python-dateutil-2.7.2-py36_0.tar.bz2 70 | https://conda.anaconda.org/pytorch/linux-64/pytorch-0.4.1-py36_cuda8.0.61_cudnn7.1.2_1.tar.bz2 71 | https://repo.continuum.io/pkgs/main/linux-64/pytz-2018.4-py36_0.tar.bz2 72 | https://repo.continuum.io/pkgs/main/linux-64/pyyaml-3.12-py36hafb9ca4_1.tar.bz2 73 | https://repo.continuum.io/pkgs/main/linux-64/qt-5.9.5-h7e424d6_0.tar.bz2 74 | https://repo.continuum.io/pkgs/main/linux-64/readline-7.0-ha6073c6_4.tar.bz2 75 | https://repo.continuum.io/pkgs/main/linux-64/scikit-learn-0.19.1-py36h7aa7ec6_0.tar.bz2 76 | https://repo.continuum.io/pkgs/main/linux-64/scipy-1.1.0-py36hfc37229_0.tar.bz2 77 | https://repo.continuum.io/pkgs/main/linux-64/setuptools-39.1.0-py36_0.tar.bz2 78 | https://repo.continuum.io/pkgs/main/linux-64/sip-4.19.8-py36hf484d3e_0.tar.bz2 79 | https://repo.continuum.io/pkgs/main/linux-64/six-1.11.0-py36h372c433_1.tar.bz2 80 | https://repo.continuum.io/pkgs/main/linux-64/sqlite-3.23.1-he433501_0.tar.bz2 81 | https://repo.continuum.io/pkgs/main/linux-64/tk-8.6.7-hc745277_3.tar.bz2 82 | https://conda.anaconda.org/pytorch/linux-64/torchvision-0.2.1-py36_1.tar.bz2 83 | https://repo.continuum.io/pkgs/main/linux-64/tornado-5.0.2-py36_0.tar.bz2 84 | https://repo.continuum.io/pkgs/main/linux-64/tqdm-4.23.0-py36_0.tar.bz2 85 | https://repo.continuum.io/pkgs/main/linux-64/wheel-0.31.0-py36_0.tar.bz2 86 | https://repo.continuum.io/pkgs/main/linux-64/xz-5.2.3-h5e939de_4.tar.bz2 87 | https://repo.continuum.io/pkgs/main/linux-64/yaml-0.1.7-had09818_2.tar.bz2 88 | https://repo.continuum.io/pkgs/main/linux-64/zlib-1.2.11-ha838bed_2.tar.bz2 89 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | class Config: 5 | def __init__(self): 6 | self._configs = {} 7 | self._configs["dataset"] = None 8 | self._configs["sampling_function"] = "kp_detection" 9 | 10 | # Training Config 11 | self._configs["display"] = 50 12 | self._configs["snapshot"] = 5000 13 | self._configs["stepsize"] = 450000 14 | self._configs["learning_rate"] = 0.00025 15 | self._configs["decay_rate"] = 10 16 | self._configs["max_iter"] = 500000 17 | self._configs["val_iter"] = 100 18 | self._configs["batch_size"] = 1 19 | self._configs["snapshot_name"] = None 20 | self._configs["prefetch_size"] = 100 21 | self._configs["weight_decay"] = False 22 | self._configs["weight_decay_rate"] = 1e-5 23 | self._configs["weight_decay_type"] = "l2" 24 | self._configs["pretrain"] = None 25 | self._configs["opt_algo"] = "adam" 26 | self._configs["chunk_sizes"] = None 27 | 28 | # Directories 29 | self._configs["data_dir"] = "/data/zwzhou/Data/MOT17/MOT17Labels" 30 | self._configs["cache_dir"] = "cache" 31 | self._configs["config_dir"] = "config" 32 | self._configs["result_dir"] = "results" 33 | 34 | # Split 35 | self._configs["train_split"] = "trainval" 36 | self._configs["val_split"] = "minival" 37 | self._configs["test_split"] = "testdev" 38 | 39 | # Rng 40 | self._configs["data_rng"] = np.random.RandomState(123) 41 | self._configs["nnet_rng"] = np.random.RandomState(317) 42 | 43 | 44 | @property 45 | def chunk_sizes(self): 46 | return self._configs["chunk_sizes"] 47 | 48 | @property 49 | def train_split(self): 50 | return self._configs["train_split"] 51 | 52 | @property 53 | def val_split(self): 54 | return self._configs["val_split"] 55 | 56 | @property 57 | def test_split(self): 58 | return self._configs["test_split"] 59 | 60 | @property 61 | def full(self): 62 | return self._configs 63 | 64 | @property 65 | def sampling_function(self): 66 | return self._configs["sampling_function"] 67 | 68 | @property 69 | def data_rng(self): 70 | return self._configs["data_rng"] 71 | 72 | @property 73 | def nnet_rng(self): 74 | return self._configs["nnet_rng"] 75 | 76 | @property 77 | def opt_algo(self): 78 | return self._configs["opt_algo"] 79 | 80 | @property 81 | def weight_decay_type(self): 82 | return self._configs["weight_decay_type"] 83 | 84 | @property 85 | def prefetch_size(self): 86 | return self._configs["prefetch_size"] 87 | 88 | @property 89 | def pretrain(self): 90 | return self._configs["pretrain"] 91 | 92 | @property 93 | def weight_decay_rate(self): 94 | return self._configs["weight_decay_rate"] 95 | 96 | @property 97 | def weight_decay(self): 98 | return self._configs["weight_decay"] 99 | 100 | @property 101 | def result_dir(self): 102 | result_dir = os.path.join(self._configs["result_dir"], self.snapshot_name) 103 | if not os.path.exists(result_dir): 104 | os.makedirs(result_dir) 105 | return result_dir 106 | 107 | @property 108 | def dataset(self): 109 | return self._configs["dataset"] 110 | 111 | @property 112 | def snapshot_name(self): 113 | return self._configs["snapshot_name"] 114 | 115 | @property 116 | def snapshot_dir(self): 117 | snapshot_dir = os.path.join(self.cache_dir, "nnet", self.snapshot_name) 118 | 119 | if not os.path.exists(snapshot_dir): 120 | os.makedirs(snapshot_dir) 121 | 122 | return snapshot_dir 123 | 124 | @property 125 | def snapshot_file(self): 126 | snapshot_file = os.path.join(self.snapshot_dir, self.snapshot_name + "_{}.pkl") 127 | return snapshot_file 128 | 129 | @property 130 | def config_dir(self): 131 | return self._configs["config_dir"] 132 | 133 | @property 134 | def batch_size(self): 135 | return self._configs["batch_size"] 136 | 137 | @property 138 | def max_iter(self): 139 | return self._configs["max_iter"] 140 | 141 | @property 142 | def learning_rate(self): 143 | return self._configs["learning_rate"] 144 | 145 | @property 146 | def decay_rate(self): 147 | return self._configs["decay_rate"] 148 | 149 | @property 150 | def stepsize(self): 151 | return self._configs["stepsize"] 152 | 153 | @property 154 | def snapshot(self): 155 | return self._configs["snapshot"] 156 | 157 | @property 158 | def display(self): 159 | return self._configs["display"] 160 | 161 | @property 162 | def val_iter(self): 163 | return self._configs["val_iter"] 164 | 165 | @property 166 | def data_dir(self): 167 | return self._configs["data_dir"] 168 | 169 | @property 170 | def cache_dir(self): 171 | if not os.path.exists(self._configs["cache_dir"]): 172 | os.makedirs(self._configs["cache_dir"]) 173 | return self._configs["cache_dir"] 174 | 175 | def update_config(self, new): 176 | for key in new: 177 | if key in self._configs: 178 | self._configs[key] = new[key] 179 | 180 | system_configs = Config() 181 | -------------------------------------------------------------------------------- /config/CenterNet-52-multi_scale.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "dataset": "MSCOCO", 4 | "batch_size": 24, 5 | "sampling_function": "kp_detection", 6 | 7 | "train_split": "trainval", 8 | "val_split": "minival", 9 | 10 | "learning_rate": 0.00025, 11 | "decay_rate": 10, 12 | 13 | "val_iter": 500, 14 | 15 | "opt_algo": "adam", 16 | "prefetch_size": 6, 17 | 18 | "max_iter": 480000, 19 | "stepsize": 450000, 20 | "snapshot": 5000, 21 | 22 | "chunk_sizes": [6,6,6,6], 23 | 24 | "data_dir": "./data" 25 | }, 26 | 27 | "db": { 28 | "rand_scale_min": 0.6, 29 | "rand_scale_max": 1.4, 30 | "rand_scale_step": 0.1, 31 | "rand_scales": null, 32 | 33 | "rand_crop": true, 34 | "rand_color": true, 35 | 36 | "border": 128, 37 | "gaussian_bump": true, 38 | 39 | "input_size": [511, 511], 40 | "output_sizes": [[128, 128]], 41 | 42 | "test_scales": [0.6, 1, 1.2], 43 | 44 | "top_k": 70, 45 | "categories": 80, 46 | "kp_categories": 1, 47 | "ae_threshold": 0.5, 48 | "nms_threshold": 0.5, 49 | 50 | "merge_bbox": true, 51 | "weight_exp": 10, 52 | 53 | "max_per_image": 100 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /config/CenterNet-52.json: -------------------------------------------------------------------------------- 1 | { 2 | "system": { 3 | "dataset": "CityPerson", 4 | "batch_size": 12, 5 | "sampling_function": "kp_detection", 6 | 7 | "train_split": "trainval", 8 | "val_split": "minival", 9 | 10 | "learning_rate": 0.00025, 11 | "decay_rate": 10, 12 | 13 | "val_iter": 500, 14 | 15 | "opt_algo": "adam", 16 | "prefetch_size": 20, 17 | 18 | "max_iter": 400000, 19 | "stepsize": 350000, 20 | "snapshot": 10000, 21 | 22 | "chunk_sizes": [6, 6], 23 | 24 | "data_dir": "/data/zwzhou/Data/CityPerson" 25 | }, 26 | 27 | "db": { 28 | "rand_scale_min": 0.6, 29 | "rand_scale_max": 1.4, 30 | "rand_scale_step": 0.1, 31 | "rand_scales": null, 32 | 33 | "rand_crop": true, 34 | "rand_color": true, 35 | 36 | "border": 128, 37 | "gaussian_bump": true, 38 | 39 | "input_size": [511, 511], 40 | "output_sizes": [[128, 128]], 41 | 42 | "test_scales": [1], 43 | 44 | "top_k": 70, 45 | "categories": 1, 46 | "kp_categories": 1, 47 | "ae_threshold": 0.5, 48 | "nms_threshold": 0.5, 49 | 50 | "max_per_image": 100, 51 | "pretrained_model": "cache/nnet/CenterNet-52/CenterNet-52_480000.pkl" 52 | 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /data/demo/000001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/data/demo/000001.jpg -------------------------------------------------------------------------------- /data/demo/000003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/data/demo/000003.jpg -------------------------------------------------------------------------------- /db/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/db/__init__.py -------------------------------------------------------------------------------- /db/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | 5 | from config import system_configs 6 | 7 | class BASE(object): 8 | def __init__(self): 9 | self._split = None 10 | self._db_inds = [] 11 | self._image_ids = [] 12 | 13 | self._data = None 14 | self._image_hdf5 = None 15 | self._image_file = None 16 | self._image_hdf5_file = None 17 | 18 | self._mean = np.zeros((3, ), dtype=np.float32) 19 | self._std = np.ones((3, ), dtype=np.float32) 20 | self._eig_val = np.ones((3, ), dtype=np.float32) 21 | self._eig_vec = np.zeros((3, 3), dtype=np.float32) 22 | 23 | self._configs = {} 24 | self._train_cfg = {} 25 | self._model = {} 26 | self._configs["data_aug"] = True 27 | 28 | self._data_rng = None 29 | 30 | @property 31 | def data(self): 32 | if self._data is None: 33 | raise ValueError("data is not set") 34 | return self._data 35 | 36 | @property 37 | def configs(self): 38 | return self._configs 39 | 40 | @property 41 | def train_cfg(self): 42 | return self._train_cfg 43 | 44 | @property 45 | def model(self): 46 | return self._model 47 | 48 | @property 49 | def mean(self): 50 | return self._mean 51 | 52 | @property 53 | def std(self): 54 | return self._std 55 | 56 | @property 57 | def eig_val(self): 58 | return self._eig_val 59 | 60 | @property 61 | def eig_vec(self): 62 | return self._eig_vec 63 | 64 | @property 65 | def db_inds(self): 66 | return self._db_inds 67 | 68 | @property 69 | def split(self): 70 | return self._split 71 | 72 | def update_config(self, new): 73 | for key in new: 74 | if key in self._configs: 75 | self._configs[key] = new[key] 76 | 77 | def image_ids(self, ind): 78 | return self._image_ids[ind] 79 | 80 | def image_file(self, ind): 81 | if self._image_file is None: 82 | raise ValueError("Image path is not initialized") 83 | 84 | image_id = self._image_ids[ind] 85 | return self._image_file.format(image_id) 86 | 87 | def write_result(self, ind, all_bboxes, all_scores): 88 | pass 89 | 90 | def evaluate(self, name): 91 | pass 92 | 93 | def shuffle_inds(self, quiet=False): 94 | if self._data_rng is None: 95 | self._data_rng = np.random.RandomState(os.getpid()) 96 | 97 | if not quiet: 98 | print("shuffling indices...") 99 | rand_perm = self._data_rng.permutation(len(self._db_inds)) 100 | self._db_inds = self._db_inds[rand_perm] 101 | -------------------------------------------------------------------------------- /db/cityperson.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import pickle 4 | import numpy as np 5 | from tqdm import tqdm 6 | from config import system_configs 7 | from db.detection import DETECTION 8 | 9 | 10 | class CityPerson(DETECTION): 11 | def __init__(self, db_config, split): 12 | super(CityPerson, self).__init__(db_config) 13 | 14 | data_dir = system_configs.data_dir 15 | result_dir = system_configs.result_dir 16 | cache_dir = system_configs.cache_dir 17 | 18 | self._split = split 19 | self._dataset = { 20 | "trainval": "train", 21 | "minival": "val" 22 | }[self._split] 23 | 24 | self._image_dir = os.path.join(data_dir, "leftImg8bit") 25 | 26 | self._image_file = os.path.join(self._image_dir, "{}") 27 | 28 | self._anno_dir = os.path.join(data_dir, "gtBboxCityPersons") 29 | 30 | self._data = "pedestrian" # the sample function file 31 | self._mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32) 32 | self._std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32) 33 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32) 34 | self._eig_vec = np.array([ 35 | [-0.58752847, -0.69563484, 0.41340352], 36 | [-0.5832747, 0.00994535, -0.81221408], 37 | [-0.56089297, 0.71832671, 0.41158938] 38 | ], dtype=np.float32) 39 | 40 | self._cache_file = os.path.join(cache_dir, "cityperson_{}.pkl".format(self._split)) 41 | self._load_data() 42 | self._db_inds = np.arange(len(self._image_ids)) 43 | 44 | 45 | def _load_data(self): 46 | print("loading from cache file: {}".format(self._cache_file)) 47 | if not os.path.exists(self._cache_file): 48 | print("No cache file found...") 49 | self._extract_data() 50 | with open(self._cache_file, "wb") as f: 51 | pickle.dump([self._detections, self._image_ids], f) 52 | else: 53 | with open(self._cache_file, "rb") as f: 54 | self._detections, self._image_ids = pickle.load(f) 55 | 56 | def _extract_data(self): 57 | self._image_ids = [] 58 | self._detections = {} 59 | subsets = os.listdir(os.path.join(self._anno_dir, self._dataset)) #["frankfurt", "lindau", "munster"] 60 | for ss in subsets: 61 | anno_dir = '{}/{}'.format(self._dataset, ss) 62 | for anno in os.listdir(os.path.join(self._anno_dir, anno_dir)): 63 | anno_file = os.path.join(self._anno_dir, '{}/{}'.format(anno_dir, anno)) 64 | img_id = os.path.join(anno_dir, anno.replace("gtBboxCityPersons.json", "leftImg8bit.png")) 65 | self._image_ids.append(img_id) 66 | bboxes = [] 67 | with open(anno_file, 'r') as f: 68 | anno_info = json.load(f) 69 | objs = anno_info["objects"] 70 | for obj in objs: 71 | if obj['label'] == 'pedestrian': 72 | bbox = obj['bbox'] 73 | bboxVis = obj['bboxVis'] 74 | if bboxVis[2]*bboxVis[3] * 1.0 / bbox[2] * bbox[3] > 0.4: 75 | bbox = np.array(bbox) 76 | bbox[2:] += bbox[:2] 77 | bboxes.append(bbox.tolist()) 78 | bboxes = np.array(bboxes, dtype=float) 79 | if bboxes.size == 0: 80 | self._detections[img_id] = np.zeros((0, 5)) 81 | else: 82 | self._detections[img_id] = np.hstack((bboxes, np.ones((len(bboxes), 1)))) 83 | 84 | def detections(self, ind): 85 | image_id = self._image_ids[ind] 86 | detections = self._detections[image_id] 87 | return detections.astype(float).copy() 88 | 89 | def _to_float(self, x): 90 | return float(":.2f".format(x)) 91 | 92 | def convert_to_dict(self, all_boxes): 93 | scores, bboxes, img_ids, clses = [], [], [], [] 94 | for img_id in all_boxes: 95 | for cls_id in all_boxes[img_id]: 96 | dets = all_boxes[img_id][cls_id] 97 | img_ids.extend([img_id] * len(dets)) 98 | clses.extend([cls_id] * len(dets)) 99 | scores.append(dets[:, -1]) 100 | bboxes.append(dets[:, :-1]) 101 | scores = np.concatenate(scores, axis=0) 102 | bboxes = np.concatenate(bboxes, axis=0) 103 | detections = {"image_ids": img_ids, 104 | "category_ids": clses, 105 | "bboxes": bboxes, 106 | "confidences": scores} 107 | return detections 108 | 109 | 110 | 111 | def evaluate(self, detections, ovthresh=0.5): 112 | image_ids = detections['image_ids'] 113 | bboxes = detections['bboxes'] 114 | confidences = detections["confidences"] 115 | category_ids= detections["category_ids"] # only one class in our results 116 | 117 | # pre and rec 118 | sorted_ind = np.argsort(-confidences) 119 | bboxes = bboxes[sorted_ind, :] 120 | image_ids = [image_ids[x] for x in sorted_ind] 121 | nd = len(sorted_ind) 122 | tp, fp = np.zeros(nd), np.zeros(nd) 123 | 124 | nps = 0 125 | R_dets = {} 126 | for id in image_ids: 127 | if id not in R_dets: 128 | R_dets[id] = np.zeros(len(self._detections[id])) 129 | nps += len(self._detections[id]) 130 | 131 | for d in range(nd): 132 | R = self._detections[image_ids[d]] 133 | R_det = R_dets[image_ids[d]] 134 | bb = bboxes[d, :].astype(float) 135 | ovrmax = -np.inf 136 | BBGT = R[:, :4].astype(float) 137 | 138 | if BBGT.size > 0: 139 | xmin = np.maximum(BBGT[:, 0], bb[0]) 140 | xmax = np.minimum(BBGT[:, 2], bb[2]) 141 | ymin = np.maximum(BBGT[:, 1], bb[1]) 142 | ymax = np.minimum(BBGT[:, 3], bb[3]) 143 | w = np.maximum(xmax - xmin + 1, 0.) 144 | h = np.maximum(ymax - ymin + 1, 0, ) 145 | inters = w * h # intersection 146 | unions = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \ 147 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - 148 | BBGT[:, 1] + 1.) - inters 149 | 150 | ious = inters / unions 151 | ovrmax = np.max(ious) 152 | jmax = np.argmax(ious) 153 | if ovrmax > ovthresh: 154 | if R_det[jmax] == 0: 155 | tp[d] = 1 156 | R_det[jmax] = 1 157 | else: 158 | fp[d] = 1 159 | else: 160 | fp[d] = 1 161 | fp = np.cumsum(fp) 162 | tp = np.cumsum(tp) 163 | rec = tp/float(nps) 164 | pre = tp/np.maximum(tp + fp, np.finfo(np.float64).eps) 165 | 166 | def voc_ap(rec, pre, use_07_metric=False): 167 | """Compute VOC AP given precision and recall. 168 | If use_07_metric is true, uses the VOC 07 11-point method (default: False)""" 169 | if use_07_metric: 170 | ap = 0. 171 | for t in np.arange(0., 1.1, 0.1): 172 | if np.sum(rec >= t) == 0: 173 | p = 0 174 | else: 175 | p = np.max(pre[rec >= t]) 176 | ap = ap + p / 11. 177 | else: 178 | # first append sentinel values at the end 179 | mrec = np.concatenate(([0.], rec, [1.])) 180 | mpre = np.concatenate(([0.], pre, [0.])) 181 | # compute the precision, 182 | for i in range(mpre.size - 1, 0, -1): 183 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 184 | i = np.where(mrec[1:] != mrec[:-1])[0] 185 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 186 | return ap 187 | print("The final evaluated AP: {}".format(voc_ap(rec, pre))) 188 | 189 | 190 | if __name__=='__main__': 191 | import cv2 192 | os.chdir('../') 193 | 194 | cfg_file = os.path.join(system_configs.config_dir, 'CenterNet-52.json') 195 | with open(cfg_file, 'r') as f: 196 | configs = json.load(f) 197 | 198 | configs['system']['snapshot_name'] = 'CenterNet-52' 199 | system_configs.update_config(configs['system']) 200 | 201 | val_split = system_configs.val_split 202 | val_db = CityPerson(configs['db'], val_split) 203 | 204 | ind = 1 205 | img_file = val_db.image_file(ind) 206 | detections = val_db.detections(ind) 207 | img = cv2.imread(img_file) 208 | 209 | for d in detections: 210 | cv2.rectangle(img, (int(d[0]), int(d[1])), (int(d[2]), int(d[3])), color=(0, 0, 255)) 211 | 212 | 213 | cv2.imshow('test', img) 214 | cv2.waitKey(0) -------------------------------------------------------------------------------- /db/datasets.py: -------------------------------------------------------------------------------- 1 | from db.cityperson import CityPerson 2 | 3 | datasets = { 4 | "CityPerson": CityPerson 5 | # "MCOCO": MCOCO 6 | } -------------------------------------------------------------------------------- /db/detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from db.base import BASE 3 | 4 | 5 | class DETECTION(BASE): 6 | def __init__(self, db_config): 7 | super(DETECTION, self).__init__() 8 | 9 | self._configs["categories"] = 2 10 | self._configs["kp_categories"] = 1 11 | self._configs["rand_scales"] = [1] 12 | self._configs["rand_scale_min"] = 0.8 13 | self._configs["rand_scale_max"] = 1.4 14 | self._configs["rand_scale_step"] = 0.2 15 | 16 | self._configs["input_size"] = [511] 17 | self._configs["output_sizes"] = [[128, 128]] 18 | 19 | self._configs["nms_threshold"] = 0.5 20 | self._configs["max_per_image"] = 100 21 | self._configs["top_k"] = 100 22 | self._configs["ae_threshold"] = 0.5 23 | self._configs["nms_kernel"] = 3 24 | 25 | self._configs["nms_algorithm"] = "exp_soft_nms" 26 | self._configs["weight_exp"] = 8 27 | self._configs["merge_bbox"] = False 28 | 29 | self._configs["data_aug"] = True 30 | self._configs["lighting"] = True 31 | 32 | self._configs["border"] = 128 33 | self._configs["gaussian_bump"] = True 34 | self._configs["gaussian_iou"] = 0.7 35 | self._configs["gaussian_radius"] = -1 36 | self._configs["rand_crop"] = False 37 | self._configs["rand_color"] = False 38 | self._configs["rand_pushes"] = False 39 | self._configs["rand_samples"] = False 40 | self._configs["special_crop"] = False 41 | 42 | self._configs["test_scales"] = [1] 43 | 44 | self._train_cfg["rcnn"] = dict( 45 | assigner=dict( 46 | pos_iou_thr=0.5, 47 | neg_iou_thr=0.5, 48 | min_pos_iou=0.5, 49 | ignore_iof_thr=-1), 50 | sampler=dict( 51 | num=512, 52 | pos_fraction=0.25, 53 | neg_pos_ub=-1, 54 | add_gt_as_proposals=True, 55 | pos_balance_sampling=False, 56 | neg_balance_thr=0), 57 | mask_size=28, 58 | pos_weight=-1, 59 | debug=False) 60 | 61 | self._model['bbox_roi_extractor'] = dict( 62 | type='SingleRoIExtractor', 63 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 64 | out_channels=256, 65 | featmap_strides=[4]) 66 | 67 | self._model['bbox_head'] = dict( 68 | type='SharedFCBBoxHead', 69 | num_fcs=2, 70 | in_channels=256, 71 | fc_out_channels=1024, 72 | roi_feat_size=7, 73 | num_classes=81, 74 | target_means=[0., 0., 0., 0.], 75 | target_stds=[0.1, 0.1, 0.2, 0.2], 76 | reg_class_agnostic=False) 77 | 78 | self.update_config(db_config) 79 | 80 | if self._configs["rand_scales"] is None: 81 | self._configs["rand_scales"] = np.arange( 82 | self._configs["rand_scale_min"], 83 | self._configs["rand_scale_max"], 84 | self._configs["rand_scale_step"] 85 | ) 86 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import cv2 4 | import time 5 | from tqdm import tqdm 6 | from test.vis_utils import draw_bboxes 7 | from test.detector import CenterNet as Detector 8 | 9 | os.environ['CUDA_VISIBLE_DEVICES']='2' 10 | detector = Detector("CenterNet-52", iter=10000) 11 | t0 = time.time() 12 | image_names = [img for img in os.listdir('data/demo') if img[-3:]=='jpg'] 13 | for i in tqdm(range(len(image_names))): 14 | image = cv2.imread('data/demo/{}'.format(image_names[i])) 15 | bboxes = detector(image) 16 | image = draw_bboxes(image, bboxes) 17 | cv2.imwrite("tmp_squeeze/{}.jpg".format(str(i).zfill(6)), image) 18 | cv2.imshow('image', image) 19 | cv2.waitKey(10) 20 | 21 | t1 = time.time() 22 | print("speed: %f s"%((t1-t0)/100)) 23 | -------------------------------------------------------------------------------- /external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/external/__init__.py -------------------------------------------------------------------------------- /external/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # ---------------------------------------------------------- 9 | # Soft-NMS: Improving Object Detection With One Line of Code 10 | # Copyright (c) University of Maryland, College Park 11 | # Licensed under The MIT License [see LICENSE for details] 12 | # Written by Navaneeth Bodla and Bharat Singh 13 | # ---------------------------------------------------------- 14 | 15 | import numpy as np 16 | cimport numpy as np 17 | 18 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 19 | return a if a >= b else b 20 | 21 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 22 | return a if a <= b else b 23 | 24 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 25 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 26 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 27 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 28 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 29 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 30 | 31 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 32 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 33 | 34 | cdef int ndets = dets.shape[0] 35 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 36 | np.zeros((ndets), dtype=np.int) 37 | 38 | # nominal indices 39 | cdef int _i, _j 40 | # sorted indices 41 | cdef int i, j 42 | # temp variables for box i's (the box currently under consideration) 43 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 44 | # variables for computing overlap with box j (lower scoring box) 45 | cdef np.float32_t xx1, yy1, xx2, yy2 46 | cdef np.float32_t w, h 47 | cdef np.float32_t inter, ovr 48 | 49 | keep = [] 50 | for _i in range(ndets): 51 | i = order[_i] 52 | if suppressed[i] == 1: 53 | continue 54 | keep.append(i) 55 | ix1 = x1[i] 56 | iy1 = y1[i] 57 | ix2 = x2[i] 58 | iy2 = y2[i] 59 | iarea = areas[i] 60 | for _j in range(_i + 1, ndets): 61 | j = order[_j] 62 | if suppressed[j] == 1: 63 | continue 64 | xx1 = max(ix1, x1[j]) 65 | yy1 = max(iy1, y1[j]) 66 | xx2 = min(ix2, x2[j]) 67 | yy2 = min(iy2, y2[j]) 68 | w = max(0.0, xx2 - xx1 + 1) 69 | h = max(0.0, yy2 - yy1 + 1) 70 | inter = w * h 71 | ovr = inter / (iarea + areas[j] - inter) 72 | if ovr >= thresh: 73 | suppressed[j] = 1 74 | 75 | return keep 76 | 77 | def soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0): 78 | cdef unsigned int N = boxes.shape[0] 79 | cdef float iw, ih, box_area 80 | cdef float ua 81 | cdef int pos = 0 82 | cdef float maxscore = 0 83 | cdef int maxpos = 0 84 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 85 | 86 | for i in range(N): 87 | maxscore = boxes[i, 4] 88 | maxpos = i 89 | 90 | tx1 = boxes[i,0] 91 | ty1 = boxes[i,1] 92 | tx2 = boxes[i,2] 93 | ty2 = boxes[i,3] 94 | ts = boxes[i,4] 95 | 96 | pos = i + 1 97 | # get max box 98 | while pos < N: 99 | if maxscore < boxes[pos, 4]: 100 | maxscore = boxes[pos, 4] 101 | maxpos = pos 102 | pos = pos + 1 103 | 104 | # add max box as a detection 105 | boxes[i,0] = boxes[maxpos,0] 106 | boxes[i,1] = boxes[maxpos,1] 107 | boxes[i,2] = boxes[maxpos,2] 108 | boxes[i,3] = boxes[maxpos,3] 109 | boxes[i,4] = boxes[maxpos,4] 110 | 111 | # swap ith box with position of max box 112 | boxes[maxpos,0] = tx1 113 | boxes[maxpos,1] = ty1 114 | boxes[maxpos,2] = tx2 115 | boxes[maxpos,3] = ty2 116 | boxes[maxpos,4] = ts 117 | 118 | tx1 = boxes[i,0] 119 | ty1 = boxes[i,1] 120 | tx2 = boxes[i,2] 121 | ty2 = boxes[i,3] 122 | ts = boxes[i,4] 123 | 124 | pos = i + 1 125 | # NMS iterations, note that N changes if detection boxes fall below threshold 126 | while pos < N: 127 | x1 = boxes[pos, 0] 128 | y1 = boxes[pos, 1] 129 | x2 = boxes[pos, 2] 130 | y2 = boxes[pos, 3] 131 | s = boxes[pos, 4] 132 | 133 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 134 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 135 | if iw > 0: 136 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 137 | if ih > 0: 138 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 139 | ov = iw * ih / ua #iou between max box and detection box 140 | 141 | if method == 1: # linear 142 | if ov > Nt: 143 | weight = 1 - ov 144 | else: 145 | weight = 1 146 | elif method == 2: # gaussian 147 | weight = np.exp(-(ov * ov)/sigma) 148 | else: # original NMS 149 | if ov > Nt: 150 | weight = 0 151 | else: 152 | weight = 1 153 | 154 | boxes[pos, 4] = weight*boxes[pos, 4] 155 | 156 | # if box score falls below threshold, discard the box by swapping with last box 157 | # update N 158 | if boxes[pos, 4] < threshold: 159 | boxes[pos,0] = boxes[N-1, 0] 160 | boxes[pos,1] = boxes[N-1, 1] 161 | boxes[pos,2] = boxes[N-1, 2] 162 | boxes[pos,3] = boxes[N-1, 3] 163 | boxes[pos,4] = boxes[N-1, 4] 164 | N = N - 1 165 | pos = pos - 1 166 | 167 | pos = pos + 1 168 | 169 | keep = [i for i in range(N)] 170 | return keep 171 | 172 | def soft_nms_merge(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0, float weight_exp=6): 173 | cdef unsigned int N = boxes.shape[0] 174 | cdef float iw, ih, box_area 175 | cdef float ua 176 | cdef int pos = 0 177 | cdef float maxscore = 0 178 | cdef int maxpos = 0 179 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 180 | cdef float mx1,mx2,my1,my2,mts,mbs,mw 181 | 182 | for i in range(N): 183 | maxscore = boxes[i, 4] 184 | maxpos = i 185 | 186 | tx1 = boxes[i,0] 187 | ty1 = boxes[i,1] 188 | tx2 = boxes[i,2] 189 | ty2 = boxes[i,3] 190 | ts = boxes[i,4] 191 | 192 | pos = i + 1 193 | # get max box 194 | while pos < N: 195 | if maxscore < boxes[pos, 4]: 196 | maxscore = boxes[pos, 4] 197 | maxpos = pos 198 | pos = pos + 1 199 | 200 | # add max box as a detection 201 | boxes[i,0] = boxes[maxpos,0] 202 | boxes[i,1] = boxes[maxpos,1] 203 | boxes[i,2] = boxes[maxpos,2] 204 | boxes[i,3] = boxes[maxpos,3] 205 | boxes[i,4] = boxes[maxpos,4] 206 | 207 | mx1 = boxes[i, 0] * boxes[i, 5] 208 | my1 = boxes[i, 1] * boxes[i, 5] 209 | mx2 = boxes[i, 2] * boxes[i, 6] 210 | my2 = boxes[i, 3] * boxes[i, 6] 211 | mts = boxes[i, 5] 212 | mbs = boxes[i, 6] 213 | 214 | # swap ith box with position of max box 215 | boxes[maxpos,0] = tx1 216 | boxes[maxpos,1] = ty1 217 | boxes[maxpos,2] = tx2 218 | boxes[maxpos,3] = ty2 219 | boxes[maxpos,4] = ts 220 | 221 | tx1 = boxes[i,0] 222 | ty1 = boxes[i,1] 223 | tx2 = boxes[i,2] 224 | ty2 = boxes[i,3] 225 | ts = boxes[i,4] 226 | 227 | pos = i + 1 228 | # NMS iterations, note that N changes if detection boxes fall below threshold 229 | while pos < N: 230 | x1 = boxes[pos, 0] 231 | y1 = boxes[pos, 1] 232 | x2 = boxes[pos, 2] 233 | y2 = boxes[pos, 3] 234 | s = boxes[pos, 4] 235 | 236 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 237 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 238 | if iw > 0: 239 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 240 | if ih > 0: 241 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 242 | ov = iw * ih / ua #iou between max box and detection box 243 | 244 | if method == 1: # linear 245 | if ov > Nt: 246 | weight = 1 - ov 247 | else: 248 | weight = 1 249 | elif method == 2: # gaussian 250 | weight = np.exp(-(ov * ov)/sigma) 251 | else: # original NMS 252 | if ov > Nt: 253 | weight = 0 254 | else: 255 | weight = 1 256 | 257 | mw = (1 - weight) ** weight_exp 258 | mx1 = mx1 + boxes[pos, 0] * boxes[pos, 5] * mw 259 | my1 = my1 + boxes[pos, 1] * boxes[pos, 5] * mw 260 | mx2 = mx2 + boxes[pos, 2] * boxes[pos, 6] * mw 261 | my2 = my2 + boxes[pos, 3] * boxes[pos, 6] * mw 262 | mts = mts + boxes[pos, 5] * mw 263 | mbs = mbs + boxes[pos, 6] * mw 264 | 265 | boxes[pos, 4] = weight*boxes[pos, 4] 266 | 267 | # if box score falls below threshold, discard the box by swapping with last box 268 | # update N 269 | if boxes[pos, 4] < threshold: 270 | boxes[pos,0] = boxes[N-1, 0] 271 | boxes[pos,1] = boxes[N-1, 1] 272 | boxes[pos,2] = boxes[N-1, 2] 273 | boxes[pos,3] = boxes[N-1, 3] 274 | boxes[pos,4] = boxes[N-1, 4] 275 | N = N - 1 276 | pos = pos - 1 277 | 278 | pos = pos + 1 279 | 280 | boxes[i, 0] = mx1 / mts 281 | boxes[i, 1] = my1 / mts 282 | boxes[i, 2] = mx2 / mbs 283 | boxes[i, 3] = my2 / mbs 284 | 285 | keep = [i for i in range(N)] 286 | return keep 287 | -------------------------------------------------------------------------------- /external/nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/external/nms.so -------------------------------------------------------------------------------- /external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /models/CenterNet-104.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .py_utils import kp, AELoss, _neg_loss, convolution, residual 5 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool 6 | 7 | class pool(nn.Module): 8 | def __init__(self, dim, pool1, pool2): 9 | super(pool, self).__init__() 10 | self.p1_conv1 = convolution(3, dim, 128) 11 | self.p2_conv1 = convolution(3, dim, 128) 12 | 13 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 14 | self.p_bn1 = nn.BatchNorm2d(dim) 15 | 16 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 17 | self.bn1 = nn.BatchNorm2d(dim) 18 | self.relu1 = nn.ReLU(inplace=True) 19 | 20 | self.conv2 = convolution(3, dim, dim) 21 | 22 | self.pool1 = pool1() 23 | self.pool2 = pool2() 24 | 25 | self.look_conv1 = convolution(3, dim, 128) 26 | self.look_conv2 = convolution(3, dim, 128) 27 | self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False) 28 | self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False) 29 | 30 | def forward(self, x): 31 | # pool 1 32 | look_conv1 = self.look_conv1(x) 33 | p1_conv1 = self.p1_conv1(x) 34 | look_right = self.pool2(look_conv1) 35 | P1_look_conv = self.P1_look_conv(p1_conv1+look_right) 36 | pool1 = self.pool1(P1_look_conv) 37 | 38 | # pool 2 39 | look_conv2 = self.look_conv2(x) 40 | p2_conv1 = self.p2_conv1(x) 41 | look_down = self.pool1(look_conv2) 42 | P2_look_conv = self.P2_look_conv(p2_conv1+look_down) 43 | pool2 = self.pool2(P2_look_conv) 44 | 45 | # pool 1 + pool 2 46 | p_conv1 = self.p_conv1(pool1 + pool2) 47 | p_bn1 = self.p_bn1(p_conv1) 48 | 49 | conv1 = self.conv1(x) 50 | bn1 = self.bn1(conv1) 51 | relu1 = self.relu1(p_bn1 + bn1) 52 | 53 | conv2 = self.conv2(relu1) 54 | return conv2 55 | 56 | class pool_cross(nn.Module): 57 | def __init__(self, dim, pool1, pool2, pool3, pool4): 58 | super(pool_cross, self).__init__() 59 | self.p1_conv1 = convolution(3, dim, 128) 60 | self.p2_conv1 = convolution(3, dim, 128) 61 | 62 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 63 | self.p_bn1 = nn.BatchNorm2d(dim) 64 | 65 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 66 | self.bn1 = nn.BatchNorm2d(dim) 67 | self.relu1 = nn.ReLU(inplace=True) 68 | 69 | self.conv2 = convolution(3, dim, dim) 70 | 71 | self.pool1 = pool1() 72 | self.pool2 = pool2() 73 | self.pool3 = pool3() 74 | self.pool4 = pool4() 75 | 76 | def forward(self, x): 77 | # pool 1 78 | p1_conv1 = self.p1_conv1(x) 79 | pool1 = self.pool1(p1_conv1) 80 | pool1 = self.pool3(pool1) 81 | 82 | # pool 2 83 | p2_conv1 = self.p2_conv1(x) 84 | pool2 = self.pool2(p2_conv1) 85 | pool2 = self.pool4(pool2) 86 | 87 | # pool 1 + pool 2 88 | p_conv1 = self.p_conv1(pool1 + pool2) 89 | p_bn1 = self.p_bn1(p_conv1) 90 | 91 | conv1 = self.conv1(x) 92 | bn1 = self.bn1(conv1) 93 | relu1 = self.relu1(p_bn1 + bn1) 94 | 95 | conv2 = self.conv2(relu1) 96 | return conv2 97 | 98 | class tl_pool(pool): 99 | def __init__(self, dim): 100 | super(tl_pool, self).__init__(dim, TopPool, LeftPool) 101 | 102 | class br_pool(pool): 103 | def __init__(self, dim): 104 | super(br_pool, self).__init__(dim, BottomPool, RightPool) 105 | 106 | class center_pool(pool_cross): 107 | def __init__(self, dim): 108 | super(center_pool, self).__init__(dim, TopPool, LeftPool, BottomPool, RightPool) 109 | 110 | def make_tl_layer(dim): 111 | return tl_pool(dim) 112 | 113 | def make_br_layer(dim): 114 | return br_pool(dim) 115 | 116 | def make_ct_layer(dim): 117 | return center_pool(dim) 118 | 119 | def make_pool_layer(dim): 120 | return nn.Sequential() 121 | 122 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs): 123 | layers = [layer(kernel, dim0, dim1, stride=2)] 124 | layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)] 125 | return nn.Sequential(*layers) 126 | 127 | class model(kp): 128 | def __init__(self, db): 129 | n = 5 130 | dims = [256, 256, 384, 384, 384, 512] 131 | modules = [2, 2, 2, 2, 2, 4] 132 | out_dim = 1 133 | 134 | super(model, self).__init__( 135 | db, n, 2, dims, modules, out_dim, 136 | make_tl_layer=make_tl_layer, 137 | make_br_layer=make_br_layer, 138 | make_ct_layer=make_ct_layer, 139 | make_pool_layer=make_pool_layer, 140 | make_hg_layer=make_hg_layer, 141 | kp_layer=residual, cnv_dim=256 142 | ) 143 | 144 | loss = AELoss(pull_weight=1e-1, push_weight=1e-1, focal_loss=_neg_loss) 145 | -------------------------------------------------------------------------------- /models/CenterNet-52.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .py_utils import kp, AELoss, _neg_loss, convolution, residual 5 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool 6 | 7 | class pool(nn.Module): 8 | def __init__(self, dim, pool1, pool2): 9 | super(pool, self).__init__() 10 | self.p1_conv1 = convolution(3, dim, 128) 11 | self.p2_conv1 = convolution(3, dim, 128) 12 | 13 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 14 | self.p_bn1 = nn.BatchNorm2d(dim) 15 | 16 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 17 | self.bn1 = nn.BatchNorm2d(dim) 18 | self.relu1 = nn.ReLU(inplace=True) 19 | 20 | self.conv2 = convolution(3, dim, dim) 21 | 22 | self.pool1 = pool1() 23 | self.pool2 = pool2() 24 | 25 | self.look_conv1 = convolution(3, dim, 128) 26 | self.look_conv2 = convolution(3, dim, 128) 27 | self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False) 28 | self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False) 29 | 30 | def forward(self, x): 31 | # pool 1 32 | look_conv1 = self.look_conv1(x) 33 | p1_conv1 = self.p1_conv1(x) 34 | look_right = self.pool2(look_conv1) 35 | P1_look_conv = self.P1_look_conv(p1_conv1+look_right) 36 | pool1 = self.pool1(P1_look_conv) 37 | 38 | # pool 2 39 | look_conv2 = self.look_conv2(x) 40 | p2_conv1 = self.p2_conv1(x) 41 | look_down = self.pool1(look_conv2) 42 | P2_look_conv = self.P2_look_conv(p2_conv1+look_down) 43 | pool2 = self.pool2(P2_look_conv) 44 | 45 | # pool 1 + pool 2 46 | p_conv1 = self.p_conv1(pool1 + pool2) 47 | p_bn1 = self.p_bn1(p_conv1) 48 | 49 | conv1 = self.conv1(x) 50 | bn1 = self.bn1(conv1) 51 | relu1 = self.relu1(p_bn1 + bn1) 52 | 53 | conv2 = self.conv2(relu1) 54 | return conv2 55 | 56 | class pool_cross(nn.Module): 57 | def __init__(self, dim, pool1, pool2, pool3, pool4): 58 | super(pool_cross, self).__init__() 59 | self.p1_conv1 = convolution(3, dim, 128) 60 | self.p2_conv1 = convolution(3, dim, 128) 61 | 62 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 63 | self.p_bn1 = nn.BatchNorm2d(dim) 64 | 65 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 66 | self.bn1 = nn.BatchNorm2d(dim) 67 | self.relu1 = nn.ReLU(inplace=True) 68 | 69 | self.conv2 = convolution(3, dim, dim) 70 | 71 | self.pool1 = pool1() 72 | self.pool2 = pool2() 73 | self.pool3 = pool3() 74 | self.pool4 = pool4() 75 | 76 | def forward(self, x): 77 | # pool 1 78 | p1_conv1 = self.p1_conv1(x) 79 | pool1 = self.pool1(p1_conv1) 80 | pool1 = self.pool3(pool1) 81 | 82 | # pool 2 83 | p2_conv1 = self.p2_conv1(x) 84 | pool2 = self.pool2(p2_conv1) 85 | pool2 = self.pool4(pool2) 86 | 87 | # pool 1 + pool 2 88 | p_conv1 = self.p_conv1(pool1 + pool2) 89 | p_bn1 = self.p_bn1(p_conv1) 90 | 91 | conv1 = self.conv1(x) 92 | bn1 = self.bn1(conv1) 93 | relu1 = self.relu1(p_bn1 + bn1) 94 | 95 | conv2 = self.conv2(relu1) 96 | return conv2 97 | 98 | class tl_pool(pool): 99 | def __init__(self, dim): 100 | super(tl_pool, self).__init__(dim, TopPool, LeftPool) 101 | 102 | class br_pool(pool): 103 | def __init__(self, dim): 104 | super(br_pool, self).__init__(dim, BottomPool, RightPool) 105 | 106 | class center_pool(pool_cross): 107 | def __init__(self, dim): 108 | super(center_pool, self).__init__(dim, TopPool, LeftPool, BottomPool, RightPool) 109 | 110 | def make_tl_layer(dim): 111 | return tl_pool(dim) 112 | 113 | def make_br_layer(dim): 114 | return br_pool(dim) 115 | 116 | def make_ct_layer(dim): 117 | return center_pool(dim) 118 | 119 | def make_pool_layer(dim): 120 | return nn.Sequential() 121 | 122 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs): 123 | layers = [layer(kernel, dim0, dim1, stride=2)] 124 | layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)] 125 | return nn.Sequential(*layers) 126 | 127 | class model(kp): 128 | def __init__(self, db): 129 | n = 5 130 | dims = [256, 256, 384, 384, 384, 512] 131 | modules = [2, 2, 2, 2, 2, 4] 132 | out_dim = 1 # categatory num 133 | 134 | super(model, self).__init__( 135 | db, n, 1, dims, modules, out_dim, 136 | make_tl_layer=make_tl_layer, 137 | make_br_layer=make_br_layer, 138 | make_ct_layer=make_ct_layer, 139 | make_pool_layer=make_pool_layer, 140 | make_hg_layer=make_hg_layer, 141 | kp_layer=residual, cnv_dim=256 142 | ) 143 | 144 | loss = AELoss(pull_weight=1e-1, push_weight=1e-1, focal_loss=_neg_loss) 145 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/models/__init__.py -------------------------------------------------------------------------------- /models/py_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .kp import kp, AELoss 2 | from .kp_utils import _neg_loss 3 | 4 | from .utils import convolution, fully_connected, residual 5 | 6 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool 7 | -------------------------------------------------------------------------------- /models/py_utils/_cpools/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | cpools.egg-info/ 3 | dist/ 4 | -------------------------------------------------------------------------------- /models/py_utils/_cpools/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | from torch.autograd import Function 5 | import sys 6 | import os 7 | sys.path.append(os.path.join(os.path.dirname(__file__),'dist/cpools-0.0.0-py3.6-linux-x86_64.egg')) 8 | import top_pool, bottom_pool, left_pool, right_pool 9 | 10 | class TopPoolFunction(Function): 11 | @staticmethod 12 | def forward(ctx, input): 13 | output = top_pool.forward(input)[0] 14 | ctx.save_for_backward(input) 15 | return output 16 | 17 | @staticmethod 18 | def backward(ctx, grad_output): 19 | input = ctx.saved_variables[0] 20 | output = top_pool.backward(input, grad_output)[0] 21 | return output 22 | 23 | class BottomPoolFunction(Function): 24 | @staticmethod 25 | def forward(ctx, input): 26 | output = bottom_pool.forward(input)[0] 27 | ctx.save_for_backward(input) 28 | return output 29 | 30 | @staticmethod 31 | def backward(ctx, grad_output): 32 | input = ctx.saved_variables[0] 33 | output = bottom_pool.backward(input, grad_output)[0] 34 | return output 35 | 36 | class LeftPoolFunction(Function): 37 | @staticmethod 38 | def forward(ctx, input): 39 | output = left_pool.forward(input)[0] 40 | ctx.save_for_backward(input) 41 | return output 42 | 43 | @staticmethod 44 | def backward(ctx, grad_output): 45 | input = ctx.saved_variables[0] 46 | output = left_pool.backward(input, grad_output)[0] 47 | return output 48 | 49 | class RightPoolFunction(Function): 50 | @staticmethod 51 | def forward(ctx, input): 52 | output = right_pool.forward(input)[0] 53 | ctx.save_for_backward(input) 54 | return output 55 | 56 | @staticmethod 57 | def backward(ctx, grad_output): 58 | input = ctx.saved_variables[0] 59 | output = right_pool.backward(input, grad_output)[0] 60 | return output 61 | 62 | class TopPool(nn.Module): 63 | def forward(self, x): 64 | return TopPoolFunction.apply(x) 65 | 66 | class BottomPool(nn.Module): 67 | def forward(self, x): 68 | return BottomPoolFunction.apply(x) 69 | 70 | class LeftPool(nn.Module): 71 | def forward(self, x): 72 | return LeftPoolFunction.apply(x) 73 | 74 | class RightPool(nn.Module): 75 | def forward(self, x): 76 | return RightPoolFunction.apply(x) 77 | -------------------------------------------------------------------------------- /models/py_utils/_cpools/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension 3 | 4 | setup( 5 | name="cpools", 6 | ext_modules=[ 7 | CppExtension("top_pool", ["src/top_pool.cpp"]), 8 | CppExtension("bottom_pool", ["src/bottom_pool.cpp"]), 9 | CppExtension("left_pool", ["src/left_pool.cpp"]), 10 | CppExtension("right_pool", ["src/right_pool.cpp"]) 11 | ], 12 | cmdclass={ 13 | "build_ext": BuildExtension 14 | } 15 | ) 16 | -------------------------------------------------------------------------------- /models/py_utils/_cpools/src/bottom_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(2, 0); 16 | at::Tensor output_temp = output.select(2, 0); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 0; ind < height - 1; ++ind) { 21 | input_temp = input.select(2, ind + 1); 22 | output_temp = output.select(2, ind); 23 | max_temp = output.select(2, ind + 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | auto output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, width}); 46 | 47 | auto input_temp = input.select(2, 0); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(0); 51 | 52 | auto output_temp = output.select(2, 0); 53 | auto grad_output_temp = grad_output.select(2, 0); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(2); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, width}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 59 | for (int32_t ind = 0; ind < height - 1; ++ind) { 60 | input_temp = input.select(2, ind + 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, ind + 1); 66 | 67 | grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); 68 | output.scatter_add_(2, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &pool_forward, "Bottom Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &pool_backward, "Bottom Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /models/py_utils/_cpools/src/left_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(3, width - 1); 16 | at::Tensor output_temp = output.select(3, width - 1); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 1; ind < width; ++ind) { 21 | input_temp = input.select(3, width - ind - 1); 22 | output_temp = output.select(3, width - ind); 23 | max_temp = output.select(3, width - ind - 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | auto output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, height}); 46 | 47 | auto input_temp = input.select(3, width - 1); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(width - 1); 51 | 52 | auto output_temp = output.select(3, width - 1); 53 | auto grad_output_temp = grad_output.select(3, width - 1); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(3); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, height}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 59 | for (int32_t ind = 1; ind < width; ++ind) { 60 | input_temp = input.select(3, width - ind - 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, width - ind - 1); 66 | 67 | grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); 68 | output.scatter_add_(3, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &pool_forward, "Left Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &pool_backward, "Left Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /models/py_utils/_cpools/src/right_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(3, 0); 16 | at::Tensor output_temp = output.select(3, 0); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 0; ind < width - 1; ++ind) { 21 | input_temp = input.select(3, ind + 1); 22 | output_temp = output.select(3, ind); 23 | max_temp = output.select(3, ind + 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | at::Tensor output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, height}); 46 | 47 | auto input_temp = input.select(3, 0); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(0); 51 | 52 | auto output_temp = output.select(3, 0); 53 | auto grad_output_temp = grad_output.select(3, 0); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(3); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, height}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height}); 59 | for (int32_t ind = 0; ind < width - 1; ++ind) { 60 | input_temp = input.select(3, ind + 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, ind + 1); 66 | 67 | grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); 68 | output.scatter_add_(3, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &pool_forward, "Right Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &pool_backward, "Right Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /models/py_utils/_cpools/src/top_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector top_pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | // Copy the last column 15 | at::Tensor input_temp = input.select(2, height - 1); 16 | at::Tensor output_temp = output.select(2, height - 1); 17 | output_temp.copy_(input_temp); 18 | 19 | at::Tensor max_temp; 20 | for (int64_t ind = 1; ind < height; ++ind) { 21 | input_temp = input.select(2, height - ind - 1); 22 | output_temp = output.select(2, height - ind); 23 | max_temp = output.select(2, height - ind - 1); 24 | 25 | at::max_out(max_temp, input_temp, output_temp); 26 | } 27 | 28 | return { 29 | output 30 | }; 31 | } 32 | 33 | std::vector top_pool_backward( 34 | at::Tensor input, 35 | at::Tensor grad_output 36 | ) { 37 | auto output = at::zeros_like(input); 38 | 39 | int32_t batch = input.size(0); 40 | int32_t channel = input.size(1); 41 | int32_t height = input.size(2); 42 | int32_t width = input.size(3); 43 | 44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, width}); 46 | 47 | auto input_temp = input.select(2, height - 1); 48 | max_val.copy_(input_temp); 49 | 50 | max_ind.fill_(height - 1); 51 | 52 | auto output_temp = output.select(2, height - 1); 53 | auto grad_output_temp = grad_output.select(2, height - 1); 54 | output_temp.copy_(grad_output_temp); 55 | 56 | auto un_max_ind = max_ind.unsqueeze(2); 57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, width}); 58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width}); 59 | for (int32_t ind = 1; ind < height; ++ind) { 60 | input_temp = input.select(2, height - ind - 1); 61 | at::gt_out(gt_mask, input_temp, max_val); 62 | 63 | at::masked_select_out(max_temp, input_temp, gt_mask); 64 | max_val.masked_scatter_(gt_mask, max_temp); 65 | max_ind.masked_fill_(gt_mask, height - ind - 1); 66 | 67 | grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); 68 | output.scatter_add_(2, un_max_ind, grad_output_temp); 69 | } 70 | 71 | return { 72 | output 73 | }; 74 | } 75 | 76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 77 | m.def( 78 | "forward", &top_pool_forward, "Top Pool Forward", 79 | py::call_guard() 80 | ); 81 | m.def( 82 | "backward", &top_pool_backward, "Top Pool Backward", 83 | py::call_guard() 84 | ); 85 | } 86 | -------------------------------------------------------------------------------- /models/py_utils/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | from .scatter_gather import scatter_kwargs 8 | 9 | class DataParallel(Module): 10 | r"""Implements data parallelism at the module level. 11 | 12 | This container parallelizes the application of the given module by 13 | splitting the input across the specified devices by chunking in the batch 14 | dimension. In the forward pass, the module is replicated on each device, 15 | and each replica handles a portion of the input. During the backwards 16 | pass, gradients from each replica are summed into the original module. 17 | 18 | The batch size should be larger than the number of GPUs used. It should 19 | also be an integer multiple of the number of GPUs so that each chunk is the 20 | same size (so that each GPU processes the same number of samples). 21 | 22 | See also: :ref:`cuda-nn-dataparallel-instead` 23 | 24 | Arbitrary positional and keyword inputs are allowed to be passed into 25 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 26 | specified (default 0). Primitive types will be broadcasted, but all 27 | other types will be a shallow copy and can be corrupted if written to in 28 | the model's forward pass. 29 | 30 | Args: 31 | module: module to be parallelized 32 | device_ids: CUDA devices (default: all devices) 33 | output_device: device location of output (default: device_ids[0]) 34 | 35 | Example:: 36 | 37 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 38 | >>> output = net(input_var) 39 | """ 40 | 41 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 42 | 43 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 44 | super(DataParallel, self).__init__() 45 | 46 | if not torch.cuda.is_available(): 47 | self.module = module 48 | self.device_ids = [] 49 | return 50 | 51 | if device_ids is None: 52 | device_ids = list(range(torch.cuda.device_count())) 53 | if output_device is None: 54 | output_device = device_ids[0] 55 | self.dim = dim 56 | self.module = module 57 | self.device_ids = device_ids 58 | self.chunk_sizes = chunk_sizes 59 | self.output_device = output_device 60 | if len(self.device_ids) == 1: 61 | self.module.cuda(device_ids[0]) 62 | 63 | def forward(self, *inputs, **kwargs): 64 | if not self.device_ids: 65 | return self.module(*inputs, **kwargs) 66 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 67 | if len(self.device_ids) == 1: 68 | return self.module(*inputs[0], **kwargs[0]) 69 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 70 | outputs = self.parallel_apply(replicas, inputs, kwargs) 71 | return self.gather(outputs, self.output_device) 72 | 73 | def replicate(self, module, device_ids): 74 | return replicate(module, device_ids) 75 | 76 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 77 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 78 | 79 | def parallel_apply(self, replicas, inputs, kwargs): 80 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 81 | 82 | def gather(self, outputs, output_device): 83 | return gather(outputs, output_device, dim=self.dim) 84 | 85 | 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 87 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 88 | 89 | This is the functional version of the DataParallel module. 90 | 91 | Args: 92 | module: the module to evaluate in parallel 93 | inputs: inputs to the module 94 | device_ids: GPU ids on which to replicate module 95 | output_device: GPU location of the output Use -1 to indicate the CPU. 96 | (default: device_ids[0]) 97 | Returns: 98 | a Variable containing the result of module(input) located on 99 | output_device 100 | """ 101 | if not isinstance(inputs, tuple): 102 | inputs = (inputs,) 103 | 104 | if device_ids is None: 105 | device_ids = list(range(torch.cuda.device_count())) 106 | 107 | if output_device is None: 108 | output_device = device_ids[0] 109 | 110 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 111 | if len(device_ids) == 1: 112 | return module(*inputs[0], **module_kwargs[0]) 113 | used_device_ids = device_ids[:len(inputs)] 114 | replicas = replicate(module, used_device_ids) 115 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 116 | return gather(outputs, output_device, dim) 117 | -------------------------------------------------------------------------------- /models/py_utils/kp.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import torch 3 | 4 | import numpy as np 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | from .utils import convolution, residual 9 | from .utils import make_layer, make_layer_revr 10 | 11 | from .kp_utils import _tranpose_and_gather_feat, _decode 12 | from .kp_utils import _sigmoid, _ae_loss, _regr_loss, _neg_loss 13 | from .kp_utils import make_tl_layer, make_br_layer, make_kp_layer, make_ct_layer 14 | from .kp_utils import make_pool_layer, make_unpool_layer 15 | from .kp_utils import make_merge_layer, make_inter_layer, make_cnv_layer 16 | 17 | 18 | class kp_module(nn.Module): 19 | def __init__( 20 | self, n, dims, modules, layer=residual, 21 | make_up_layer=make_layer, make_low_layer=make_layer, 22 | make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr, 23 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, 24 | make_merge_layer=make_merge_layer, **kwargs 25 | ): 26 | super(kp_module, self).__init__() 27 | 28 | self.n = n 29 | 30 | curr_mod = modules[0] 31 | next_mod = modules[1] 32 | 33 | curr_dim = dims[0] 34 | next_dim = dims[1] 35 | 36 | self.up1 = make_up_layer( 37 | 3, curr_dim, curr_dim, curr_mod, 38 | layer=layer, **kwargs 39 | ) 40 | self.max1 = make_pool_layer(curr_dim) 41 | self.low1 = make_hg_layer( 42 | 3, curr_dim, next_dim, curr_mod, 43 | layer=layer, **kwargs 44 | ) 45 | self.low2 = kp_module( 46 | n - 1, dims[1:], modules[1:], layer=layer, 47 | make_up_layer=make_up_layer, 48 | make_low_layer=make_low_layer, 49 | make_hg_layer=make_hg_layer, 50 | make_hg_layer_revr=make_hg_layer_revr, 51 | make_pool_layer=make_pool_layer, 52 | make_unpool_layer=make_unpool_layer, 53 | make_merge_layer=make_merge_layer, 54 | **kwargs 55 | ) if self.n > 1 else \ 56 | make_low_layer( 57 | 3, next_dim, next_dim, next_mod, 58 | layer=layer, **kwargs 59 | ) 60 | self.low3 = make_hg_layer_revr( 61 | 3, next_dim, curr_dim, curr_mod, 62 | layer=layer, **kwargs 63 | ) 64 | self.up2 = make_unpool_layer(curr_dim) 65 | 66 | self.merge = make_merge_layer(curr_dim) 67 | 68 | def forward(self, x): 69 | up1 = self.up1(x) 70 | max1 = self.max1(x) 71 | low1 = self.low1(max1) 72 | low2 = self.low2(low1) 73 | low3 = self.low3(low2) 74 | up2 = self.up2(low3) 75 | return self.merge(up1, up2) 76 | 77 | 78 | class kp(nn.Module): 79 | def __init__( 80 | self, db, n, nstack, dims, modules, out_dim, pre=None, cnv_dim=256, 81 | make_tl_layer=make_tl_layer, make_br_layer=make_br_layer, make_ct_layer=make_ct_layer, 82 | make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer, 83 | make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer, 84 | make_up_layer=make_layer, make_low_layer=make_layer, 85 | make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr, 86 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, 87 | make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, 88 | kp_layer=residual 89 | ): 90 | super(kp, self).__init__() 91 | 92 | self.nstack = nstack 93 | self._decode = _decode 94 | self._db = db 95 | self.K = self._db.configs["top_k"] 96 | self.ae_threshold = self._db.configs["ae_threshold"] 97 | self.kernel = self._db.configs["nms_kernel"] 98 | self.input_size = self._db.configs["input_size"][0] 99 | self.output_size = self._db.configs["output_sizes"][0][0] 100 | 101 | curr_dim = dims[0] 102 | 103 | self.pre = nn.Sequential( 104 | convolution(7, 3, 128, stride=2), 105 | residual(3, 128, 256, stride=2) 106 | ) if pre is None else pre 107 | 108 | self.kps = nn.ModuleList([ 109 | kp_module( 110 | n, dims, modules, layer=kp_layer, 111 | make_up_layer=make_up_layer, 112 | make_low_layer=make_low_layer, 113 | make_hg_layer=make_hg_layer, 114 | make_hg_layer_revr=make_hg_layer_revr, 115 | make_pool_layer=make_pool_layer, 116 | make_unpool_layer=make_unpool_layer, 117 | make_merge_layer=make_merge_layer 118 | ) for _ in range(nstack) 119 | ]) 120 | self.cnvs = nn.ModuleList([ 121 | make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack) 122 | ]) 123 | 124 | self.tl_cnvs = nn.ModuleList([ 125 | make_tl_layer(cnv_dim) for _ in range(nstack) 126 | ]) 127 | self.br_cnvs = nn.ModuleList([ 128 | make_br_layer(cnv_dim) for _ in range(nstack) 129 | ]) 130 | 131 | self.ct_cnvs = nn.ModuleList([ 132 | make_ct_layer(cnv_dim) for _ in range(nstack) 133 | ]) 134 | 135 | ## keypoint heatmaps 136 | self.tl_heats = nn.ModuleList([ 137 | make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack) 138 | ]) 139 | self.br_heats = nn.ModuleList([ 140 | make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack) 141 | ]) 142 | 143 | self.ct_heats = nn.ModuleList([ 144 | make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack) 145 | ]) 146 | 147 | ## tags 148 | self.tl_tags = nn.ModuleList([ 149 | make_tag_layer(cnv_dim, curr_dim, 1) for _ in range(nstack) 150 | ]) 151 | self.br_tags = nn.ModuleList([ 152 | make_tag_layer(cnv_dim, curr_dim, 1) for _ in range(nstack) 153 | ]) 154 | 155 | for tl_heat, br_heat, ct_heat in zip(self.tl_heats, self.br_heats, self.ct_heats): 156 | tl_heat[-1].bias.data.fill_(-2.19) 157 | br_heat[-1].bias.data.fill_(-2.19) 158 | ct_heat[-1].bias.data.fill_(-2.19) 159 | 160 | self.inters = nn.ModuleList([ 161 | make_inter_layer(curr_dim) for _ in range(nstack - 1) 162 | ]) 163 | 164 | self.inters_ = nn.ModuleList([ 165 | nn.Sequential( 166 | nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False), 167 | nn.BatchNorm2d(curr_dim) 168 | ) for _ in range(nstack - 1) 169 | ]) 170 | self.cnvs_ = nn.ModuleList([ 171 | nn.Sequential( 172 | nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False), 173 | nn.BatchNorm2d(curr_dim) 174 | ) for _ in range(nstack - 1) 175 | ]) 176 | 177 | self.tl_regrs = nn.ModuleList([ 178 | make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack) 179 | ]) 180 | self.br_regrs = nn.ModuleList([ 181 | make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack) 182 | ]) 183 | self.ct_regrs = nn.ModuleList([ 184 | make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack) 185 | ]) 186 | 187 | self.relu = nn.ReLU(inplace=True) 188 | 189 | def _train(self, *xs): 190 | image = xs[0] 191 | tl_inds = xs[1] 192 | br_inds = xs[2] 193 | ct_inds = xs[3] 194 | 195 | inter = self.pre(image) 196 | outs = [] 197 | 198 | layers = zip( 199 | self.kps, self.cnvs, 200 | self.tl_cnvs, self.br_cnvs, 201 | self.ct_cnvs, self.tl_heats, 202 | self.br_heats, self.ct_heats, 203 | self.tl_tags, self.br_tags, 204 | self.tl_regrs, self.br_regrs, 205 | self.ct_regrs 206 | ) 207 | for ind, layer in enumerate(layers): 208 | kp_, cnv_ = layer[0:2] 209 | tl_cnv_, br_cnv_ = layer[2:4] 210 | ct_cnv_, tl_heat_ = layer[4:6] 211 | br_heat_, ct_heat_ = layer[6:8] 212 | tl_tag_, br_tag_ = layer[8:10] 213 | tl_regr_, br_regr_ = layer[10:12] 214 | ct_regr_ = layer[12] 215 | 216 | kp = kp_(inter) 217 | cnv = cnv_(kp) 218 | 219 | tl_cnv = tl_cnv_(cnv) 220 | br_cnv = br_cnv_(cnv) 221 | ct_cnv = ct_cnv_(cnv) 222 | 223 | tl_heat, br_heat, ct_heat = tl_heat_(tl_cnv), br_heat_(br_cnv), ct_heat_(ct_cnv) 224 | tl_tag, br_tag = tl_tag_(tl_cnv), br_tag_(br_cnv) 225 | tl_regr, br_regr, ct_regr = tl_regr_(tl_cnv), br_regr_(br_cnv), ct_regr_(ct_cnv) 226 | 227 | tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds) 228 | br_tag = _tranpose_and_gather_feat(br_tag, br_inds) 229 | tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds) 230 | br_regr = _tranpose_and_gather_feat(br_regr, br_inds) 231 | ct_regr = _tranpose_and_gather_feat(ct_regr, ct_inds) 232 | # here tl_tag and br_tag are embedding scalar respectively to group tl and br 233 | 234 | outs += [tl_heat, br_heat, ct_heat, tl_tag, br_tag, tl_regr, br_regr, ct_regr] 235 | 236 | if ind < self.nstack - 1: 237 | inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv) 238 | inter = self.relu(inter) 239 | inter = self.inters[ind](inter) 240 | 241 | return outs 242 | 243 | def _test(self, *xs, **kwargs): 244 | image = xs[0] 245 | 246 | inter = self.pre(image) 247 | 248 | outs = [] 249 | 250 | layers = zip( 251 | self.kps, self.cnvs, 252 | self.tl_cnvs, self.br_cnvs, 253 | self.ct_cnvs, self.tl_heats, 254 | self.br_heats, self.ct_heats, 255 | self.tl_tags, self.br_tags, 256 | self.tl_regrs, self.br_regrs, 257 | self.ct_regrs 258 | ) 259 | for ind, layer in enumerate(layers): 260 | kp_, cnv_ = layer[0:2] 261 | tl_cnv_, br_cnv_ = layer[2:4] 262 | ct_cnv_, tl_heat_ = layer[4:6] 263 | br_heat_, ct_heat_ = layer[6:8] 264 | tl_tag_, br_tag_ = layer[8:10] 265 | tl_regr_, br_regr_ = layer[10:12] 266 | ct_regr_ = layer[12] 267 | 268 | kp = kp_(inter) 269 | cnv = cnv_(kp) 270 | 271 | if ind == self.nstack - 1: 272 | tl_cnv = tl_cnv_(cnv) 273 | br_cnv = br_cnv_(cnv) 274 | ct_cnv = ct_cnv_(cnv) 275 | 276 | tl_heat, br_heat, ct_heat = tl_heat_(tl_cnv), br_heat_(br_cnv), ct_heat_(ct_cnv) 277 | tl_tag, br_tag = tl_tag_(tl_cnv), br_tag_(br_cnv) 278 | tl_regr, br_regr, ct_regr = tl_regr_(tl_cnv), br_regr_(br_cnv), ct_regr_(ct_cnv) 279 | 280 | outs += [tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, 281 | ct_heat, ct_regr] 282 | 283 | if ind < self.nstack - 1: 284 | inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv) 285 | inter = self.relu(inter) 286 | inter = self.inters[ind](inter) 287 | 288 | return self._decode(*outs[-8:], **kwargs) 289 | 290 | def forward(self, *xs, **kwargs): 291 | if len(xs) > 1: 292 | return self._train(*xs, **kwargs) 293 | return self._test(*xs, **kwargs) 294 | 295 | 296 | class AELoss(nn.Module): 297 | def __init__(self, pull_weight=1, push_weight=1, regr_weight=1, focal_loss=_neg_loss): 298 | super(AELoss, self).__init__() 299 | 300 | self.pull_weight = pull_weight 301 | self.push_weight = push_weight 302 | self.regr_weight = regr_weight 303 | self.focal_loss = focal_loss 304 | self.ae_loss = _ae_loss 305 | self.regr_loss = _regr_loss 306 | 307 | def forward(self, outs, targets): 308 | stride = 8 309 | 310 | tl_heats = outs[0::stride] 311 | br_heats = outs[1::stride] 312 | ct_heats = outs[2::stride] 313 | tl_tags = outs[3::stride] 314 | br_tags = outs[4::stride] 315 | tl_regrs = outs[5::stride] 316 | br_regrs = outs[6::stride] 317 | ct_regrs = outs[7::stride] 318 | 319 | gt_tl_heat = targets[0] 320 | gt_br_heat = targets[1] 321 | gt_ct_heat = targets[2] 322 | gt_mask = targets[3] 323 | gt_tl_regr = targets[4] 324 | gt_br_regr = targets[5] 325 | gt_ct_regr = targets[6] 326 | 327 | # focal loss 328 | focal_loss = 0 329 | 330 | tl_heats = [_sigmoid(t) for t in tl_heats] 331 | br_heats = [_sigmoid(b) for b in br_heats] 332 | ct_heats = [_sigmoid(c) for c in ct_heats] 333 | 334 | focal_loss += self.focal_loss(tl_heats, gt_tl_heat) 335 | focal_loss += self.focal_loss(br_heats, gt_br_heat) 336 | focal_loss += self.focal_loss(ct_heats, gt_ct_heat) 337 | 338 | # tag loss 339 | pull_loss = 0 340 | push_loss = 0 341 | 342 | for tl_tag, br_tag in zip(tl_tags, br_tags): 343 | pull, push = self.ae_loss(tl_tag, br_tag, gt_mask) 344 | pull_loss += pull 345 | push_loss += push 346 | pull_loss = self.pull_weight * pull_loss 347 | push_loss = self.push_weight * push_loss 348 | 349 | regr_loss = 0 350 | for tl_regr, br_regr, ct_regr in zip(tl_regrs, br_regrs, ct_regrs): 351 | regr_loss += self.regr_loss(tl_regr, gt_tl_regr, gt_mask) 352 | regr_loss += self.regr_loss(br_regr, gt_br_regr, gt_mask) 353 | regr_loss += self.regr_loss(ct_regr, gt_ct_regr, gt_mask) 354 | regr_loss = self.regr_weight * regr_loss 355 | 356 | loss = (focal_loss + pull_loss + push_loss + regr_loss) / len(tl_heats) 357 | return loss.unsqueeze(0), (focal_loss / len(tl_heats)).unsqueeze(0), (pull_loss / len(tl_heats)).unsqueeze(0), ( 358 | push_loss / len(tl_heats)).unsqueeze(0), (regr_loss / len(tl_heats)).unsqueeze(0) 359 | -------------------------------------------------------------------------------- /models/py_utils/kp_utils.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import torch 3 | import torch.nn as nn 4 | 5 | from .utils import convolution, residual 6 | 7 | 8 | class MergeUp(nn.Module): 9 | def forward(self, up1, up2): 10 | return up1 + up2 11 | 12 | 13 | def make_merge_layer(dim): 14 | return MergeUp() 15 | 16 | 17 | def make_tl_layer(dim): 18 | return None 19 | 20 | 21 | def make_br_layer(dim): 22 | return None 23 | 24 | 25 | def make_ct_layer(dim): 26 | return None 27 | 28 | 29 | def make_pool_layer(dim): 30 | return nn.MaxPool2d(kernel_size=2, stride=2) 31 | 32 | 33 | def make_unpool_layer(dim): 34 | return nn.Upsample(scale_factor=2) 35 | 36 | 37 | def make_kp_layer(cnv_dim, curr_dim, out_dim): 38 | return nn.Sequential( 39 | convolution(3, cnv_dim, curr_dim, with_bn=False), 40 | nn.Conv2d(curr_dim, out_dim, (1, 1)) 41 | ) 42 | 43 | 44 | def make_inter_layer(dim): 45 | return residual(3, dim, dim) 46 | 47 | 48 | def make_cnv_layer(inp_dim, out_dim): 49 | return convolution(3, inp_dim, out_dim) 50 | 51 | 52 | def _gather_feat(feat, ind, mask=None): 53 | dim = feat.size(2) 54 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 55 | feat = feat.gather(1, ind) 56 | if mask is not None: 57 | mask = mask.unsqueeze(2).expand_as(feat) 58 | feat = feat[mask] 59 | feat = feat.view(-1, dim) 60 | return feat 61 | 62 | 63 | def _nms(heat, kernel=1): 64 | pad = (kernel - 1) // 2 65 | 66 | hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) 67 | keep = (hmax == heat).float() 68 | return heat * keep 69 | 70 | 71 | def _tranpose_and_gather_feat(feat, ind): 72 | feat = feat.permute(0, 2, 3, 1).contiguous() 73 | feat = feat.view(feat.size(0), -1, feat.size(3)) 74 | feat = _gather_feat(feat, ind) 75 | return feat 76 | 77 | 78 | def _topk(scores, K=20): 79 | batch, cat, height, width = scores.size() 80 | 81 | topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K) 82 | 83 | topk_clses = (topk_inds / (height * width)).int() 84 | 85 | topk_inds = topk_inds % (height * width) 86 | topk_ys = (topk_inds / width).int().float() 87 | topk_xs = (topk_inds % width).int().float() 88 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs 89 | 90 | 91 | def _decode( 92 | tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, ct_heat, ct_regr, 93 | K=100, kernel=1, ae_threshold=1, num_dets=1000 94 | ): 95 | batch, cat, height, width = tl_heat.size() 96 | 97 | tl_heat = torch.sigmoid(tl_heat) 98 | br_heat = torch.sigmoid(br_heat) 99 | ct_heat = torch.sigmoid(ct_heat) 100 | 101 | # perform nms on heatmaps 102 | tl_heat = _nms(tl_heat, kernel=kernel) 103 | br_heat = _nms(br_heat, kernel=kernel) 104 | ct_heat = _nms(ct_heat, kernel=kernel) 105 | 106 | tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K) 107 | br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K) 108 | ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = _topk(ct_heat, K=K) 109 | 110 | tl_ys = tl_ys.view(batch, K, 1).expand(batch, K, K) 111 | tl_xs = tl_xs.view(batch, K, 1).expand(batch, K, K) 112 | br_ys = br_ys.view(batch, 1, K).expand(batch, K, K) 113 | br_xs = br_xs.view(batch, 1, K).expand(batch, K, K) 114 | ct_ys = ct_ys.view(batch, 1, K).expand(batch, K, K) 115 | ct_xs = ct_xs.view(batch, 1, K).expand(batch, K, K) 116 | 117 | if tl_regr is not None and br_regr is not None: 118 | tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds) 119 | tl_regr = tl_regr.view(batch, K, 1, 2) 120 | br_regr = _tranpose_and_gather_feat(br_regr, br_inds) 121 | br_regr = br_regr.view(batch, 1, K, 2) 122 | ct_regr = _tranpose_and_gather_feat(ct_regr, ct_inds) 123 | ct_regr = ct_regr.view(batch, 1, K, 2) 124 | 125 | tl_xs = tl_xs + tl_regr[..., 0] 126 | tl_ys = tl_ys + tl_regr[..., 1] 127 | br_xs = br_xs + br_regr[..., 0] 128 | br_ys = br_ys + br_regr[..., 1] 129 | ct_xs = ct_xs + ct_regr[..., 0] 130 | ct_ys = ct_ys + ct_regr[..., 1] 131 | 132 | # all possible boxes based on top k corners (ignoring class) 133 | bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3) 134 | 135 | tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds) 136 | tl_tag = tl_tag.view(batch, K, 1) 137 | br_tag = _tranpose_and_gather_feat(br_tag, br_inds) 138 | br_tag = br_tag.view(batch, 1, K) 139 | dists = torch.abs(tl_tag - br_tag) 140 | 141 | tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K) 142 | br_scores = br_scores.view(batch, 1, K).expand(batch, K, K) 143 | scores = (tl_scores + br_scores) / 2 144 | 145 | # reject boxes based on classes 146 | tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K) 147 | br_clses = br_clses.view(batch, 1, K).expand(batch, K, K) 148 | cls_inds = (tl_clses != br_clses) 149 | 150 | # reject boxes based on distances 151 | dist_inds = (dists > ae_threshold) 152 | 153 | # reject boxes based on widths and heights 154 | width_inds = (br_xs < tl_xs) 155 | height_inds = (br_ys < tl_ys) 156 | 157 | scores[cls_inds] = -1 158 | scores[dist_inds] = -1 159 | scores[width_inds] = -1 160 | scores[height_inds] = -1 161 | 162 | scores = scores.view(batch, -1) 163 | scores, inds = torch.topk(scores, num_dets) 164 | scores = scores.unsqueeze(2) 165 | 166 | bboxes = bboxes.view(batch, -1, 4) 167 | bboxes = _gather_feat(bboxes, inds) 168 | 169 | # width = (bboxes[:,:,2] - bboxes[:,:,0]).unsqueeze(2) 170 | # height = (bboxes[:,:,2] - bboxes[:,:,0]).unsqueeze(2) 171 | 172 | clses = tl_clses.contiguous().view(batch, -1, 1) 173 | clses = _gather_feat(clses, inds).float() 174 | 175 | tl_scores = tl_scores.contiguous().view(batch, -1, 1) 176 | tl_scores = _gather_feat(tl_scores, inds).float() 177 | br_scores = br_scores.contiguous().view(batch, -1, 1) 178 | br_scores = _gather_feat(br_scores, inds).float() 179 | 180 | ct_xs = ct_xs[:, 0, :] 181 | ct_ys = ct_ys[:, 0, :] 182 | 183 | center = torch.cat([ct_xs.unsqueeze(2), ct_ys.unsqueeze(2), ct_clses.float().unsqueeze(2), ct_scores.unsqueeze(2)], 184 | dim=2) 185 | detections = torch.cat([bboxes, scores, tl_scores, br_scores, clses], dim=2) 186 | return detections, center 187 | 188 | 189 | def _neg_loss(preds, gt): 190 | pos_inds = gt.eq(1) 191 | neg_inds = gt.lt(1) 192 | 193 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 194 | 195 | loss = 0 196 | for pred in preds: 197 | pos_pred = pred[pos_inds] 198 | neg_pred = pred[neg_inds] 199 | 200 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 201 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 202 | 203 | num_pos = pos_inds.float().sum() 204 | pos_loss = pos_loss.sum() 205 | neg_loss = neg_loss.sum() 206 | 207 | if pos_pred.nelement() == 0: 208 | loss = loss - neg_loss 209 | else: 210 | loss = loss - (pos_loss + neg_loss) / num_pos 211 | return loss 212 | 213 | 214 | def _sigmoid(x): 215 | x = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4) 216 | return x 217 | 218 | 219 | def _ae_loss(tag0, tag1, mask): 220 | num = mask.sum(dim=1, keepdim=True).float() 221 | tag0 = tag0.squeeze() 222 | tag1 = tag1.squeeze() 223 | 224 | tag_mean = (tag0 + tag1) / 2 225 | 226 | tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4) 227 | tag0 = tag0[mask].sum() 228 | tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4) 229 | tag1 = tag1[mask].sum() 230 | pull = tag0 + tag1 231 | 232 | mask = mask.unsqueeze(1) + mask.unsqueeze(2) 233 | mask = mask.eq(2) 234 | num = num.unsqueeze(2) 235 | num2 = (num - 1) * num 236 | dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2) 237 | dist = 1 - torch.abs(dist) 238 | dist = nn.functional.relu(dist, inplace=True) 239 | dist = dist - 1 / (num + 1e-4) 240 | dist = dist / (num2 + 1e-4) 241 | dist = dist[mask] 242 | push = dist.sum() 243 | return pull, push 244 | 245 | 246 | def _regr_loss(regr, gt_regr, mask): 247 | num = mask.float().sum() 248 | mask = mask.unsqueeze(2).expand_as(gt_regr) 249 | 250 | regr = regr[mask] 251 | gt_regr = gt_regr[mask] 252 | 253 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 254 | regr_loss = regr_loss / (num + 1e-4) 255 | return regr_loss 256 | -------------------------------------------------------------------------------- /models/py_utils/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /models/py_utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | # a convolution module containing a convolution layer, a bn layer and a relu activation 5 | # the sizes of input and output are same 6 | class convolution(nn.Module): 7 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 8 | super(convolution, self).__init__() 9 | 10 | pad = (k - 1) // 2 11 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 12 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() 13 | self.relu = nn.ReLU(inplace=True) 14 | 15 | def forward(self, x): 16 | conv = self.conv(x) 17 | bn = self.bn(conv) 18 | relu = self.relu(bn) 19 | return relu 20 | 21 | # a fully-contection module containg a linear layer, a bn layer or not and a relu activation 22 | class fully_connected(nn.Module): 23 | def __init__(self, inp_dim, out_dim, with_bn=True): 24 | super(fully_connected, self).__init__() 25 | self.with_bn = with_bn 26 | 27 | self.linear = nn.Linear(inp_dim, out_dim) 28 | if self.with_bn: 29 | self.bn = nn.BatchNorm1d(out_dim) 30 | self.relu = nn.ReLU(inplace=True) 31 | 32 | def forward(self, x): 33 | linear = self.linear(x) 34 | bn = self.bn(linear) if self.with_bn else linear 35 | relu = self.relu(bn) 36 | return relu 37 | 38 | # a residual module containg two branches, master branch contains two convolution layers while 39 | # the skip contains a convolution layer if stride is not equals to 1 in master branch or the 40 | # input channel isnot equal to the output channel 41 | class residual(nn.Module): 42 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 43 | super(residual, self).__init__() 44 | 45 | self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False) 46 | self.bn1 = nn.BatchNorm2d(out_dim) 47 | self.relu1 = nn.ReLU(inplace=True) 48 | 49 | self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False) 50 | self.bn2 = nn.BatchNorm2d(out_dim) 51 | 52 | self.skip = nn.Sequential( 53 | nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False), 54 | nn.BatchNorm2d(out_dim) 55 | ) if stride != 1 or inp_dim != out_dim else nn.Sequential() 56 | self.relu = nn.ReLU(inplace=True) 57 | 58 | def forward(self, x): 59 | conv1 = self.conv1(x) 60 | bn1 = self.bn1(conv1) 61 | relu1 = self.relu1(bn1) 62 | 63 | conv2 = self.conv2(relu1) 64 | bn2 = self.bn2(conv2) 65 | 66 | skip = self.skip(x) 67 | return self.relu(bn2 + skip) 68 | 69 | # stack modules layers, here the default layer is convolution module 70 | # only the first convolution module's input channel can not be equal to output's channel 71 | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs): 72 | layers = [layer(k, inp_dim, out_dim, **kwargs)] 73 | for _ in range(1, modules): 74 | layers.append(layer(k, out_dim, out_dim, **kwargs)) 75 | return nn.Sequential(*layers) 76 | 77 | # the reverse operation of `make_layer` 78 | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs): 79 | layers = [] 80 | for _ in range(modules - 1): 81 | layers.append(layer(k, inp_dim, inp_dim, **kwargs)) 82 | layers.append(layer(k, inp_dim, out_dim, **kwargs)) 83 | return nn.Sequential(*layers) 84 | -------------------------------------------------------------------------------- /nnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/nnet/__init__.py -------------------------------------------------------------------------------- /nnet/py_factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pdb 3 | import torch 4 | import importlib 5 | import torch.nn as nn 6 | 7 | from config import system_configs 8 | from models.py_utils.data_parallel import DataParallel 9 | 10 | torch.manual_seed(317) 11 | 12 | class Network(nn.Module): 13 | def __init__(self, model, loss): 14 | super(Network, self).__init__() 15 | 16 | self.model = model 17 | self.loss = loss 18 | 19 | def forward(self, xs, ys, **kwargs): 20 | preds = self.model(*xs, **kwargs) 21 | loss_kp = self.loss(preds, ys, **kwargs) 22 | return loss_kp 23 | 24 | # for model backward compatibility 25 | # previously model was wrapped by DataParallel module 26 | class DummyModule(nn.Module): 27 | def __init__(self, model): 28 | super(DummyModule, self).__init__() 29 | self.module = model 30 | 31 | def forward(self, *xs, **kwargs): 32 | return self.module(*xs, **kwargs) 33 | 34 | 35 | class NetworkFactory(object): 36 | def __init__(self, db): 37 | super(NetworkFactory, self).__init__() 38 | 39 | module_file = "models.{}".format(system_configs.snapshot_name) 40 | print("module_file: {}".format(module_file)) 41 | nnet_module = importlib.import_module(module_file) 42 | 43 | self.model = DummyModule(nnet_module.model(db)) 44 | self.loss = nnet_module.loss 45 | self.network = Network(self.model, self.loss) 46 | self.network = DataParallel(self.network, chunk_sizes=system_configs.chunk_sizes).cuda() 47 | self.load_cropped_pretrained_model("cache/nnet/CenterNet-52/CenterNet-52_480000.pkl") 48 | 49 | total_params = 0 50 | for params in self.model.parameters(): 51 | num_params = 1 52 | for x in params.size(): 53 | num_params *= x 54 | total_params += num_params 55 | print("total parameters: {}".format(total_params)) 56 | 57 | # self.fix_layers() # fix kps and prelayer 58 | 59 | if system_configs.opt_algo == "adam": 60 | self.optimizer = torch.optim.Adam( 61 | filter(lambda p: p.requires_grad, self.model.parameters()) 62 | ) 63 | elif system_configs.opt_algo == "sgd": 64 | self.optimizer = torch.optim.SGD( 65 | filter(lambda p: p.requires_grad, self.model.parameters()), 66 | lr=system_configs.learning_rate, 67 | momentum=0.9, weight_decay=0.0001 68 | ) 69 | else: 70 | raise ValueError("unknown optimizer") 71 | 72 | def cuda(self): 73 | self.model.cuda() 74 | 75 | def load_cropped_pretrained_model(self, params_file): 76 | x = torch.load(params_file) 77 | params = {'module.model.%s'%k: v for k, v in x.items() if 'heats' not in k} 78 | self.network.load_state_dict(params, strict=False) 79 | print("load the cropped weights from COCO successfully.") 80 | 81 | def fix_layers(self): 82 | for m, v in self.network.named_parameters(): 83 | if '.pre' in m or '.kps' in m: 84 | v.requires_grad = False 85 | 86 | def train_mode(self): 87 | self.network.train() 88 | 89 | def eval_mode(self): 90 | self.network.eval() 91 | 92 | def train(self, xs, ys, **kwargs): 93 | xs = [x for x in xs] 94 | ys = [y for y in ys] 95 | 96 | self.optimizer.zero_grad() 97 | loss_kp = self.network(xs, ys) 98 | loss = loss_kp[0] 99 | focal_loss = loss_kp[1] 100 | pull_loss = loss_kp[2] 101 | push_loss = loss_kp[3] 102 | regr_loss = loss_kp[4] 103 | loss = loss.mean() 104 | focal_loss = focal_loss.mean() 105 | pull_loss = pull_loss.mean() 106 | push_loss = push_loss.mean() 107 | regr_loss = regr_loss.mean() 108 | loss.backward() 109 | self.optimizer.step() 110 | return loss, focal_loss, pull_loss, push_loss, regr_loss 111 | 112 | def validate(self, xs, ys, **kwargs): 113 | with torch.no_grad(): 114 | xs = [x.cuda(non_blocking=True) for x in xs] 115 | ys = [y.cuda(non_blocking=True) for y in ys] 116 | 117 | loss_kp = self.network(xs, ys) 118 | loss = loss_kp[0] 119 | focal_loss = loss_kp[1] 120 | pull_loss = loss_kp[2] 121 | push_loss = loss_kp[3] 122 | regr_loss = loss_kp[4] 123 | loss = loss.mean() 124 | return loss 125 | 126 | def test(self, xs, **kwargs): 127 | with torch.no_grad(): 128 | xs = [x.cuda(non_blocking=True) for x in xs] 129 | return self.model(*xs, **kwargs) 130 | 131 | def set_lr(self, lr): 132 | print("setting learning rate to: {}".format(lr)) 133 | for param_group in self.optimizer.param_groups: 134 | param_group["lr"] = lr 135 | 136 | def load_pretrained_params(self, pretrained_model): 137 | print("loading from {}".format(pretrained_model)) 138 | with open(pretrained_model, "rb") as f: 139 | params = torch.load(f) 140 | self.model.load_state_dict(params) 141 | 142 | def load_params(self, iteration): 143 | cache_file = system_configs.snapshot_file.format(iteration) 144 | print("loading model from {}".format(cache_file)) 145 | with open(cache_file, "rb") as f: 146 | params = torch.load(f) 147 | self.model.load_state_dict(params) 148 | 149 | def save_params(self, iteration): 150 | cache_file = system_configs.snapshot_file.format(iteration) 151 | print("saving model to {}".format(cache_file)) 152 | with open(cache_file, "wb") as f: 153 | params = self.model.state_dict() 154 | torch.save(params, f) 155 | -------------------------------------------------------------------------------- /sample/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/sample/__init__.py -------------------------------------------------------------------------------- /sample/pedestrian.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import math 3 | import numpy as np 4 | import torch 5 | import random 6 | import string 7 | 8 | from config import system_configs 9 | from utils import crop_image, normalize_, color_jittering_, lighting_ 10 | from .utils import random_crop, draw_gaussian, gaussian_radius 11 | 12 | 13 | def _full_image_crop(image, detections): 14 | detections = detections.copy() 15 | height, width = image.shape[0:2] 16 | 17 | max_hw = max(height, width) 18 | center = [height // 2, width // 2] 19 | size = [max_hw, max_hw] 20 | 21 | image, border, offset = crop_image(image, center, size) 22 | detections[:, 0:4:2] += border[2] 23 | detections[:, 1:4:2] += border[0] 24 | return image, detections 25 | 26 | 27 | def _resize_image(image, detections, size): 28 | detections = detections.copy() 29 | height, width = image.shape[0:2] 30 | new_height, new_width = size 31 | 32 | image = cv2.resize(image, (new_width, new_height)) 33 | 34 | height_ratio = new_height / height 35 | width_ratio = new_width / width 36 | detections[:, 0:4:2] *= width_ratio 37 | detections[:, 1:4:2] *= height_ratio 38 | return image, detections 39 | 40 | 41 | def _clip_detections(image, detections): 42 | detections = detections.copy() 43 | height, width = image.shape[0:2] 44 | 45 | detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1) 46 | detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1) 47 | keep_inds = ((detections[:, 2] - detections[:, 0]) > 0) & \ 48 | ((detections[:, 3] - detections[:, 1]) > 0) 49 | detections = detections[keep_inds] 50 | return detections 51 | 52 | 53 | def kp_detection(db, k_ind, data_aug, debug): 54 | data_rng = system_configs.data_rng 55 | batch_size = system_configs.batch_size 56 | 57 | categories = db.configs["categories"] 58 | input_size = db.configs["input_size"] 59 | output_size = db.configs["output_sizes"][0] 60 | 61 | border = db.configs["border"] 62 | lighting = db.configs["lighting"] 63 | rand_crop = db.configs["rand_crop"] 64 | rand_color = db.configs["rand_color"] 65 | rand_scales = db.configs["rand_scales"] 66 | gaussian_bump = db.configs["gaussian_bump"] 67 | gaussian_iou = db.configs["gaussian_iou"] 68 | gaussian_rad = db.configs["gaussian_radius"] 69 | 70 | max_tag_len = 128 71 | 72 | # allocating memory 73 | images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) 74 | tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) 75 | br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) 76 | ct_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32) 77 | tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) 78 | br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) 79 | ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32) 80 | tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) 81 | br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) 82 | ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64) 83 | tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8) 84 | tag_lens = np.zeros((batch_size,), dtype=np.int32) 85 | 86 | db_size = db.db_inds.size 87 | for b_ind in range(batch_size): 88 | if not debug and k_ind == 0: 89 | db.shuffle_inds() 90 | 91 | db_ind = db.db_inds[k_ind] 92 | k_ind = (k_ind + 1) % db_size 93 | 94 | # reading image 95 | image_file = db.image_file(db_ind) 96 | image = cv2.imread(image_file) 97 | 98 | # reading detections 99 | detections = db.detections(db_ind) 100 | 101 | # cropping an image randomly 102 | if not debug and rand_crop: 103 | image, detections = random_crop(image, detections, rand_scales, input_size, border=border) 104 | else: 105 | image, detections = _full_image_crop(image, detections) 106 | 107 | image, detections = _resize_image(image, detections, input_size) 108 | detections = _clip_detections(image, detections) 109 | 110 | width_ratio = output_size[1] / input_size[1] 111 | height_ratio = output_size[0] / input_size[0] 112 | 113 | # flipping an image randomly 114 | if not debug and np.random.uniform() > 0.5: 115 | image[:] = image[:, ::-1, :] 116 | width = image.shape[1] 117 | detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1 118 | 119 | if not debug: 120 | image = image.astype(np.float32) / 255. 121 | if rand_color: 122 | color_jittering_(data_rng, image) 123 | if lighting: 124 | lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec) 125 | normalize_(image, db.mean, db.std) 126 | images[b_ind] = image.transpose((2, 0, 1)) 127 | 128 | for ind, detection in enumerate(detections): 129 | category = int(detection[-1]) - 1 130 | # category = 0 131 | 132 | xtl, ytl = detection[0], detection[1] 133 | xbr, ybr = detection[2], detection[3] 134 | xct, yct = (detection[2] + detection[0]) / 2., (detection[3] + detection[1]) / 2. 135 | 136 | fxtl = (xtl * width_ratio) 137 | fytl = (ytl * height_ratio) 138 | fxbr = (xbr * width_ratio) 139 | fybr = (ybr * height_ratio) 140 | fxct = (xct * width_ratio) 141 | fyct = (yct * height_ratio) 142 | 143 | xtl = int(fxtl) 144 | ytl = int(fytl) 145 | xbr = int(fxbr) 146 | ybr = int(fybr) 147 | xct = int(fxct) 148 | yct = int(fyct) 149 | 150 | if gaussian_bump: 151 | width = detection[2] - detection[0] 152 | height = detection[3] - detection[1] 153 | 154 | width = math.ceil(width * width_ratio) 155 | height = math.ceil(height * height_ratio) 156 | 157 | if gaussian_rad == -1: 158 | radius = gaussian_radius((height, width), gaussian_iou) 159 | radius = max(0, int(radius)) 160 | else: 161 | radius = gaussian_rad 162 | 163 | draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius) 164 | draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius) 165 | draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte=5) 166 | 167 | else: 168 | tl_heatmaps[b_ind, category, ytl, xtl] = 1 169 | br_heatmaps[b_ind, category, ybr, xbr] = 1 170 | ct_heatmaps[b_ind, category, yct, xct] = 1 171 | 172 | tag_ind = tag_lens[b_ind] 173 | tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl] 174 | br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr] 175 | ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct] 176 | tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl 177 | br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr 178 | ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct 179 | tag_lens[b_ind] += 1 180 | 181 | for b_ind in range(batch_size): 182 | tag_len = tag_lens[b_ind] 183 | tag_masks[b_ind, :tag_len] = 1 184 | 185 | images = torch.from_numpy(images) 186 | tl_heatmaps = torch.from_numpy(tl_heatmaps) 187 | br_heatmaps = torch.from_numpy(br_heatmaps) 188 | ct_heatmaps = torch.from_numpy(ct_heatmaps) 189 | tl_regrs = torch.from_numpy(tl_regrs) 190 | br_regrs = torch.from_numpy(br_regrs) 191 | ct_regrs = torch.from_numpy(ct_regrs) 192 | tl_tags = torch.from_numpy(tl_tags) # B x N, the index in (CHW) 193 | br_tags = torch.from_numpy(br_tags) 194 | ct_tags = torch.from_numpy(ct_tags) 195 | tag_masks = torch.from_numpy(tag_masks) # convinent for batch compute 196 | 197 | return { 198 | "xs": [images, tl_tags, br_tags, ct_tags], 199 | "ys": [tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs] 200 | }, k_ind 201 | 202 | 203 | def sample_data(db, k_ind, data_aug=True, debug=False): 204 | return globals()[system_configs.sampling_function](db, k_ind, data_aug, debug) 205 | -------------------------------------------------------------------------------- /sample/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | def gaussian2D(shape, sigma=1): 5 | m, n = [(ss - 1.) / 2. for ss in shape] 6 | y, x = np.ogrid[-m:m+1,-n:n+1] 7 | 8 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 9 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 10 | return h 11 | 12 | def draw_gaussian(heatmap, center, radius, k=1, delte=6): 13 | diameter = 2 * radius + 1 14 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / delte) 15 | 16 | x, y = center 17 | 18 | height, width = heatmap.shape[0:2] 19 | 20 | left, right = min(x, radius), min(width - x, radius + 1) 21 | top, bottom = min(y, radius), min(height - y, radius + 1) 22 | 23 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 24 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 25 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 26 | 27 | def gaussian_radius(det_size, min_overlap): 28 | height, width = det_size 29 | 30 | a1 = 1 31 | b1 = (height + width) 32 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 33 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 34 | r1 = (b1 + sq1) / 2 35 | 36 | a2 = 4 37 | b2 = 2 * (height + width) 38 | c2 = (1 - min_overlap) * width * height 39 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 40 | r2 = (b2 + sq2) / 2 41 | 42 | a3 = 4 * min_overlap 43 | b3 = -2 * min_overlap * (height + width) 44 | c3 = (min_overlap - 1) * width * height 45 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 46 | r3 = (b3 + sq3) / 2 47 | return min(r1, r2, r3) 48 | 49 | def _get_border(border, size): 50 | i = 1 51 | while size - border // i <= border // i: 52 | i *= 2 53 | return border // i 54 | 55 | def random_crop(image, detections, random_scales, view_size, border=64): 56 | view_height, view_width = view_size 57 | image_height, image_width = image.shape[0:2] 58 | 59 | scale = np.random.choice(random_scales) 60 | height = int(view_height * scale) 61 | width = int(view_width * scale) 62 | 63 | cropped_image = np.zeros((height, width, 3), dtype=image.dtype) 64 | 65 | w_border = _get_border(border, image_width) 66 | h_border = _get_border(border, image_height) 67 | 68 | ctx = np.random.randint(low=w_border, high=image_width - w_border) 69 | cty = np.random.randint(low=h_border, high=image_height - h_border) 70 | 71 | x0, x1 = max(ctx - width // 2, 0), min(ctx + width // 2, image_width) 72 | y0, y1 = max(cty - height // 2, 0), min(cty + height // 2, image_height) 73 | 74 | left_w, right_w = ctx - x0, x1 - ctx 75 | top_h, bottom_h = cty - y0, y1 - cty 76 | 77 | # crop image 78 | cropped_ctx, cropped_cty = width // 2, height // 2 79 | x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w) 80 | y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h) 81 | cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :] 82 | 83 | # crop detections 84 | cropped_detections = detections.copy() 85 | cropped_detections[:, 0:4:2] -= x0 86 | cropped_detections[:, 1:4:2] -= y0 87 | cropped_detections[:, 0:4:2] += cropped_ctx - left_w 88 | cropped_detections[:, 1:4:2] += cropped_cty - top_h 89 | 90 | return cropped_image, cropped_detections 91 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import torch 4 | import pprint 5 | import argparse 6 | import importlib 7 | import numpy as np 8 | 9 | import matplotlib 10 | matplotlib.use("Agg") 11 | 12 | from config import system_configs 13 | from nnet.py_factory import NetworkFactory 14 | from db.datasets import datasets 15 | 16 | os.environ["CUDA_VISIBLE_DEVICES"] = '2' 17 | torch.backends.cudnn.benchmark = False 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description="Test CenterNet") 21 | parser.add_argument('--cfg_file', default='CenterNet-52', help='config file', type=str) 22 | parser.add_argument('--testiter', dest='testiter', help='test at iteration i', 23 | default=10000, type=int) 24 | parser.add_argument('--split', dest='split', help='which split to use', 25 | default='validation', type=str) 26 | parser.add_argument('--suffix', dest="suffix", default=None, type=str) 27 | parser.add_argument('--debug', action='store_true') 28 | 29 | args = parser.parse_args() 30 | return args 31 | 32 | def make_dirs(directories): 33 | for directory in directories: 34 | if not os.path.exists(directory): 35 | os.makedirs(directory) 36 | 37 | def test(db, split, testiter, debug=False, suffix=None): 38 | result_dir = system_configs.result_dir 39 | result_dir = os.path.join(result_dir, str(testiter), split) 40 | 41 | if suffix is not None: 42 | result_dir = os.path.join(result_dir, suffix) 43 | 44 | make_dirs([result_dir]) 45 | 46 | test_iter = system_configs.max_iter if testiter is None else testiter 47 | print("loading parameters at iteration: {}".format(testiter)) 48 | 49 | print("building network ...") 50 | nnet = NetworkFactory(db) 51 | print("loading parameters ...") 52 | nnet.load_params(test_iter) 53 | 54 | test_file = 'test.{}'.format(db.data) 55 | testing = importlib.import_module(test_file).testing 56 | 57 | nnet.cuda() 58 | nnet.eval_mode() 59 | testing(db, nnet, result_dir, debug=debug) 60 | 61 | 62 | if __name__ == '__main__': 63 | args = parse_args() 64 | if args.suffix is None: 65 | cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + '.json') 66 | else: 67 | cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + '-{}.json'.format(args.suffix)) 68 | print("cfg file: {}".format(cfg_file)) 69 | 70 | with open(cfg_file, "r") as f: 71 | configs = json.load(f) 72 | 73 | configs["system"]["snapshot_name"] = args.cfg_file 74 | system_configs.update_config(configs["system"]) 75 | 76 | train_split = system_configs.train_split 77 | val_split = system_configs.val_split 78 | 79 | split = { 80 | "training": train_split, 81 | "validation": val_split, 82 | }[args.split] 83 | 84 | print("loading all datasets ...") 85 | dataset = system_configs.dataset 86 | print("split: {}".format(split)) 87 | testing_db = datasets[dataset](configs["db"], split) 88 | 89 | print("system config...") 90 | pprint.pprint(system_configs.full) 91 | 92 | print("db config...") 93 | pprint.pprint(testing_db.configs) 94 | 95 | test(testing_db, args.split, args.testiter, args.debug, args.suffix) 96 | 97 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/test/__init__.py -------------------------------------------------------------------------------- /test/base.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from nnet.py_factory import NetworkFactory 4 | 5 | class Base(object): 6 | def __init__(self, db, nnet, func, model=None): 7 | super(Base, self).__init__() 8 | 9 | self._db = db 10 | self._nnet = nnet 11 | self._func = func 12 | 13 | if model is not None: 14 | self._nnet.load_pretrained_params(model) 15 | 16 | self._nnet.cuda() 17 | self._nnet.eval_mode() 18 | 19 | def _inference(self, image, *args, **kwargs): 20 | return self._func(self._db, self._nnet, image.copy(), *args, **kwargs) 21 | 22 | def __call__(self, image, *args, **kwargs): 23 | categories = self._db.configs["categories"] 24 | bboxes = self._inference(image, *args, **kwargs) 25 | return {'pedestrian': bboxes[j] for j in range(1, categories + 1)} 26 | 27 | def load_cfg(cfg_file): 28 | with open(cfg_file, "r") as f: 29 | cfg = json.load(f) 30 | 31 | cfg_sys = cfg["system"] 32 | cfg_db = cfg["db"] 33 | return cfg_sys, cfg_db 34 | 35 | def load_nnet(cfg_sys): 36 | return NetworkFactory(cfg_sys) 37 | -------------------------------------------------------------------------------- /test/centernet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import pdb 4 | import json 5 | import copy 6 | import numpy as np 7 | import torch 8 | 9 | from PIL import Image, ImageDraw, ImageFont 10 | import matplotlib.pyplot as plt 11 | import matplotlib 12 | 13 | from tqdm import tqdm 14 | from config import system_configs 15 | from utils import crop_image, normalize_ 16 | from external.nms import soft_nms, soft_nms_merge 17 | 18 | colours = np.random.rand(80, 3) 19 | 20 | 21 | def _rescale_dets(detections, ratios, borders, sizes): 22 | xs, ys = detections[..., 0:4:2], detections[..., 1:4:2] 23 | xs /= ratios[:, 1][:, None, None] 24 | ys /= ratios[:, 0][:, None, None] 25 | xs -= borders[:, 2][:, None, None] 26 | ys -= borders[:, 0][:, None, None] 27 | tx_inds = xs[:, :, 0] <= -5 28 | bx_inds = xs[:, :, 1] >= sizes[0, 1] + 5 29 | ty_inds = ys[:, :, 0] <= -5 30 | by_inds = ys[:, :, 1] >= sizes[0, 0] + 5 31 | 32 | np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs) 33 | np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys) 34 | detections[:, tx_inds[0, :], 4] = -1 35 | detections[:, bx_inds[0, :], 4] = -1 36 | detections[:, ty_inds[0, :], 4] = -1 37 | detections[:, by_inds[0, :], 4] = -1 38 | 39 | 40 | def save_image(data, fn): 41 | sizes = np.shape(data) 42 | height = float(sizes[0]) 43 | width = float(sizes[1]) 44 | 45 | fig = plt.figure() 46 | fig.set_size_inches(width / height, 1, forward=False) 47 | ax = plt.Axes(fig, [0., 0., 1., 1.]) 48 | ax.set_axis_off() 49 | fig.add_axes(ax) 50 | 51 | ax.imshow(data) 52 | plt.savefig(fn, dpi=height) 53 | plt.close() 54 | 55 | 56 | def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3): 57 | detections, center = nnet.test([images], ae_threshold=ae_threshold, K=K, kernel=kernel) 58 | detections = detections.data.cpu().numpy() 59 | center = center.data.cpu().numpy() 60 | return detections, center 61 | 62 | 63 | def inference(db, nnet, image, decode_func=kp_decode): 64 | K = db.configs["top_k"] 65 | ae_threshold = db.configs["ae_threshold"] 66 | nms_kernel = db.configs["nms_kernel"] 67 | 68 | scales = db.configs["test_scales"] 69 | weight_exp = db.configs["weight_exp"] 70 | merge_bbox = db.configs["merge_bbox"] 71 | categories = db.configs["categories"] 72 | nms_threshold = db.configs["nms_threshold"] 73 | max_per_image = db.configs["max_per_image"] 74 | nms_algorithm = { 75 | "nms": 0, 76 | "linear_soft_nms": 1, 77 | "exp_soft_nms": 2 78 | }[db.configs["nms_algorithm"]] 79 | 80 | height, width = image.shape[0:2] 81 | detections, center_points = [], [] 82 | 83 | for scale in scales: 84 | new_height = int(height * scale) 85 | new_width = int(width * scale) 86 | new_center = np.array([new_height // 2, new_width // 2]) 87 | 88 | inp_height = new_height | 127 89 | inp_width = new_width | 127 90 | 91 | images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) 92 | ratios = np.zeros((1, 2), dtype=np.float32) 93 | borders = np.zeros((1, 4), dtype=np.float32) 94 | sizes = np.zeros((1, 2), dtype=np.float32) 95 | 96 | out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 97 | height_ratio = out_height / inp_height 98 | width_ratio = out_width / inp_width 99 | 100 | resized_image = cv2.resize(image, (new_width, new_height)) 101 | resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) 102 | 103 | resized_image = resized_image / 255. 104 | normalize_(resized_image, db.mean, db.std) 105 | 106 | images[0] = resized_image.transpose((2, 0, 1)) 107 | borders[0] = border 108 | sizes[0] = [int(height * scale), int(width * scale)] 109 | ratios[0] = [height_ratio, width_ratio] 110 | 111 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) 112 | images = torch.from_numpy(images) 113 | dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) 114 | dets = dets.reshape(2, -1, 8) # bboxes, scores, tl_scores, br_scores, clses 115 | center = center.reshape(2, -1, 4) # ct_xs, ct_ys, ct_clses, ct_scores 116 | dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] # flip 117 | center[1, :, [0]] = out_width - center[1, :, [0]] # horizontal flip 118 | dets = dets.reshape(1, -1, 8) 119 | center = center.reshape(1, -1, 4) 120 | 121 | _rescale_dets(dets, ratios, borders, sizes) 122 | center[..., [0]] /= ratios[:, 1][:, None, None] # remap to origin image 123 | center[..., [1]] /= ratios[:, 0][:, None, None] 124 | center[..., [0]] -= borders[:, 2][:, None, None] 125 | center[..., [1]] -= borders[:, 0][:, None, None] 126 | np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) 127 | np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) 128 | dets[:, :, 0:4] /= scale 129 | center[:, :, 0:2] /= scale # remap to origin image 130 | 131 | if scale == 1: 132 | center_points.append(center) 133 | detections.append(dets) 134 | 135 | detections = np.concatenate(detections, axis=1) 136 | center_points = np.concatenate(center_points, axis=1) 137 | 138 | classes = detections[..., -1] 139 | classes = classes[0] 140 | detections = detections[0] 141 | center_points = center_points[0] 142 | 143 | valid_ind = detections[:, 4] > -1 144 | valid_detections = detections[valid_ind] 145 | 146 | box_width = valid_detections[:, 2] - valid_detections[:, 0] 147 | box_height = valid_detections[:, 3] - valid_detections[:, 1] 148 | 149 | s_ind = (box_width * box_height <= 22500) 150 | l_ind = (box_width * box_height > 22500) 151 | 152 | s_detections = valid_detections[s_ind] 153 | l_detections = valid_detections[l_ind] 154 | # trisection 155 | s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 # x + (y-x)/3 156 | s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 # x +2(y-x)/3 157 | s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 158 | s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 159 | 160 | s_temp_score = copy.copy(s_detections[:, 4]) 161 | s_detections[:, 4] = -1 162 | 163 | center_x = center_points[:, 0][:, np.newaxis] 164 | center_y = center_points[:, 1][:, np.newaxis] 165 | s_left_x = s_left_x[np.newaxis, :] 166 | s_right_x = s_right_x[np.newaxis, :] 167 | s_top_y = s_top_y[np.newaxis, :] 168 | s_bottom_y = s_bottom_y[np.newaxis, :] 169 | # located in center region 170 | ind_lx = (center_x - s_left_x) > 0 171 | ind_rx = (center_x - s_right_x) < 0 172 | ind_ty = (center_y - s_top_y) > 0 173 | ind_by = (center_y - s_bottom_y) < 0 174 | # same classes 175 | ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 176 | ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 177 | index_s_new_score = np.argmax( 178 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], 179 | axis=0) # select the box having center located in the center region 180 | s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3 181 | 182 | l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 183 | l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 184 | l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 185 | l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 186 | 187 | l_temp_score = copy.copy(l_detections[:, 4]) 188 | l_detections[:, 4] = -1 189 | 190 | center_x = center_points[:, 0][:, np.newaxis] 191 | center_y = center_points[:, 1][:, np.newaxis] 192 | l_left_x = l_left_x[np.newaxis, :] 193 | l_right_x = l_right_x[np.newaxis, :] 194 | l_top_y = l_top_y[np.newaxis, :] 195 | l_bottom_y = l_bottom_y[np.newaxis, :] 196 | 197 | ind_lx = (center_x - l_left_x) > 0 198 | ind_rx = (center_x - l_right_x) < 0 199 | ind_ty = (center_y - l_top_y) > 0 200 | ind_by = (center_y - l_bottom_y) < 0 201 | ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 202 | ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1 203 | index_l_new_score = np.argmax( 204 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) 205 | l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3 206 | 207 | detections = np.concatenate([l_detections, s_detections], axis=0) 208 | detections = detections[np.argsort(-detections[:, 4])] # resort according to new scores 209 | classes = detections[..., -1] 210 | 211 | # reject detections with negative scores 212 | keep_inds = (detections[:, 4] > -1) 213 | detections = detections[keep_inds] 214 | classes = classes[keep_inds] 215 | 216 | # soft_nms 217 | top_bboxes = {} 218 | for j in range(categories): 219 | keep_inds = (classes == j) 220 | top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) 221 | if merge_bbox: 222 | soft_nms_merge(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp) 223 | else: 224 | soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm) 225 | top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5] 226 | 227 | scores = np.hstack([top_bboxes[j][:, -1] for j in range(1, categories + 1)]) 228 | # select boxes 229 | if len(scores) > max_per_image: 230 | kth = len(scores) - max_per_image 231 | thresh = np.partition(scores, kth)[kth] 232 | for j in range(1, categories + 1): 233 | keep_inds = (top_bboxes[j][:, -1] >= thresh) 234 | top_bboxes[j] = top_bboxes[j][keep_inds] 235 | 236 | return top_bboxes -------------------------------------------------------------------------------- /test/detector.py: -------------------------------------------------------------------------------- 1 | from .base import Base, load_cfg, load_nnet 2 | from config import system_configs 3 | from db.datasets import datasets 4 | import pkg_resources 5 | import importlib 6 | import os 7 | 8 | _package_name = __name__ 9 | 10 | 11 | def get_file_path(*paths): 12 | path = "/".join(paths) 13 | return pkg_resources.resource_filename(_package_name, path) 14 | 15 | 16 | class CenterNet(Base): 17 | def __init__(self, cfg_file, iter=10000, suffix=None): 18 | from test.centernet import inference 19 | 20 | model = importlib.import_module('models.%s'%cfg_file).model 21 | if suffix is None: 22 | cfg_path = os.path.join(system_configs.config_dir, "%s.json" % cfg_file) 23 | else: 24 | cfg_path = os.path.join(system_configs.config_dir, "%s-%s.json" % (cfg_file, suffix)) 25 | model_path = get_file_path("..", "cache", "nnet", cfg_file, "%s_%d.pkl" % (cfg_file, iter)) 26 | cfg_sys, cfg_db = load_cfg(cfg_path) 27 | cfg_sys["snapshot_name"] = cfg_file 28 | system_configs.update_config(cfg_sys) 29 | dataset = system_configs.dataset 30 | train_split = system_configs.train_split 31 | val_split = system_configs.val_split 32 | test_split = system_configs.test_split 33 | 34 | split = { 35 | "training": train_split, 36 | "validation": val_split, 37 | "testing": test_split 38 | }["validation"] 39 | 40 | demo = datasets[dataset](cfg_db, split) 41 | 42 | centernet = load_nnet(demo) 43 | super(CenterNet, self).__init__(demo, centernet, inference, model=model_path) 44 | 45 | 46 | -------------------------------------------------------------------------------- /test/pedestrian.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import pdb 4 | import json 5 | import copy 6 | import numpy as np 7 | import torch 8 | 9 | from PIL import Image, ImageDraw, ImageFont 10 | import matplotlib.pyplot as plt 11 | import matplotlib 12 | 13 | from tqdm import tqdm 14 | from config import system_configs 15 | from utils import crop_image, normalize_ 16 | from external.nms import soft_nms, soft_nms_merge 17 | 18 | colours = np.random.rand(80, 3) 19 | 20 | 21 | def _rescale_dets(detections, ratios, borders, sizes): 22 | xs, ys = detections[..., 0:4:2], detections[..., 1:4:2] 23 | xs /= ratios[:, 1][:, None, None] 24 | ys /= ratios[:, 0][:, None, None] 25 | xs -= borders[:, 2][:, None, None] 26 | ys -= borders[:, 0][:, None, None] 27 | tx_inds = xs[:, :, 0] <= -5 28 | bx_inds = xs[:, :, 1] >= sizes[0, 1] + 5 29 | ty_inds = ys[:, :, 0] <= -5 30 | by_inds = ys[:, :, 1] >= sizes[0, 0] + 5 31 | 32 | np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs) 33 | np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys) 34 | detections[:, tx_inds[0, :], 4] = -1 35 | detections[:, bx_inds[0, :], 4] = -1 36 | detections[:, ty_inds[0, :], 4] = -1 37 | detections[:, by_inds[0, :], 4] = -1 38 | 39 | 40 | def save_image(data, fn): 41 | sizes = np.shape(data) 42 | height = float(sizes[0]) 43 | width = float(sizes[1]) 44 | 45 | fig = plt.figure() 46 | fig.set_size_inches(width / height, 1, forward=False) 47 | ax = plt.Axes(fig, [0., 0., 1., 1.]) 48 | ax.set_axis_off() 49 | fig.add_axes(ax) 50 | 51 | ax.imshow(data) 52 | plt.savefig(fn, dpi=height) 53 | plt.close() 54 | 55 | 56 | def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3): 57 | detections, center = nnet.test([images], ae_threshold=ae_threshold, K=K, kernel=kernel) 58 | detections = detections.data.cpu().numpy() 59 | center = center.data.cpu().numpy() 60 | return detections, center 61 | 62 | 63 | def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode): 64 | debug_dir = os.path.join(result_dir, "debug") 65 | if not os.path.exists(debug_dir): 66 | os.makedirs(debug_dir) 67 | 68 | if db.split != "trainval": 69 | db_inds = db.db_inds[:100] if debug else db.db_inds 70 | else: 71 | db_inds = db.db_inds[:100] if debug else db.db_inds[:5000] 72 | num_images = db_inds.size 73 | 74 | K = db.configs["top_k"] 75 | ae_threshold = db.configs["ae_threshold"] # group corners 76 | nms_kernel = db.configs["nms_kernel"] # nms for corners 77 | 78 | scales = db.configs["test_scales"] 79 | weight_exp = db.configs["weight_exp"] # for softnms 80 | merge_bbox = db.configs["merge_bbox"] 81 | categories = db.configs["categories"] 82 | nms_threshold = db.configs["nms_threshold"] 83 | max_per_image = db.configs["max_per_image"] 84 | nms_algorithm = { 85 | "nms": 0, 86 | "linear_soft_nms": 1, 87 | "exp_soft_nms": 2 88 | }[db.configs["nms_algorithm"]] 89 | 90 | top_bboxes = {} 91 | for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"): 92 | db_ind = db_inds[ind] 93 | 94 | image_id = db.image_ids(db_ind) 95 | image_file = db.image_file(db_ind) 96 | image = cv2.imread(image_file) 97 | 98 | height, width = image.shape[0:2] 99 | 100 | detections = [] 101 | center_points = [] 102 | 103 | for scale in scales: 104 | new_height = int(height * scale) 105 | new_width = int(width * scale) 106 | new_center = np.array([new_height // 2, new_width // 2]) 107 | 108 | inp_height = new_height | 127 109 | inp_width = new_width | 127 110 | 111 | images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32) 112 | ratios = np.zeros((1, 2), dtype=np.float32) 113 | borders = np.zeros((1, 4), dtype=np.float32) 114 | sizes = np.zeros((1, 2), dtype=np.float32) 115 | 116 | out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4 117 | height_ratio = out_height / inp_height 118 | width_ratio = out_width / inp_width 119 | 120 | resized_image = cv2.resize(image, (new_width, new_height)) 121 | resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width]) 122 | 123 | resized_image = resized_image / 255. 124 | normalize_(resized_image, db.mean, db.std) 125 | 126 | images[0] = resized_image.transpose((2, 0, 1)) 127 | borders[0] = border 128 | sizes[0] = [int(height * scale), int(width * scale)] 129 | ratios[0] = [height_ratio, width_ratio] 130 | 131 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) 132 | images = torch.from_numpy(images) 133 | dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel) 134 | dets = dets.reshape(2, -1, 8) # bboxes, scores, tl_scores, br_scores, clses 135 | # here 2 cause the flip result 136 | center = center.reshape(2, -1, 4) # ct_xs, ct_ys, ct_clses, ct_scores 137 | dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] # flip 138 | center[1, :, [0]] = out_width - center[1, :, [0]] # horizontal flip 139 | dets = dets.reshape(1, -1, 8) 140 | center = center.reshape(1, -1, 4) 141 | 142 | _rescale_dets(dets, ratios, borders, sizes) 143 | center[..., [0]] /= ratios[:, 1][:, None, None] # remap to origin image 144 | center[..., [1]] /= ratios[:, 0][:, None, None] 145 | center[..., [0]] -= borders[:, 2][:, None, None] 146 | center[..., [1]] -= borders[:, 0][:, None, None] 147 | np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]]) 148 | np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]]) 149 | dets[:, :, 0:4] /= scale 150 | center[:, :, 0:2] /= scale # remap to origin image 151 | 152 | if scale == 1: 153 | center_points.append(center) 154 | detections.append(dets) 155 | 156 | detections = np.concatenate(detections, axis=1) 157 | center_points = np.concatenate(center_points, axis=1) 158 | 159 | classes = detections[..., -1] 160 | classes = classes[0] 161 | detections = detections[0] # N x 8 162 | center_points = center_points[0] # N x 4 163 | 164 | valid_ind = detections[:, 4] > -1 165 | valid_detections = detections[valid_ind] 166 | 167 | box_width = valid_detections[:, 2] - valid_detections[:, 0] 168 | box_height = valid_detections[:, 3] - valid_detections[:, 1] 169 | 170 | s_ind = (box_width * box_height <= 22500) 171 | l_ind = (box_width * box_height > 22500) 172 | 173 | s_detections = valid_detections[s_ind] 174 | l_detections = valid_detections[l_ind] 175 | # trisection 176 | s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 # x + (y-x)/3 177 | s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 # x +2(y-x)/3 178 | s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3 179 | s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3 180 | 181 | s_temp_score = copy.copy(s_detections[:, 4]) 182 | s_detections[:, 4] = -1 183 | 184 | center_x = center_points[:, 0][:, np.newaxis] 185 | center_y = center_points[:, 1][:, np.newaxis] 186 | s_left_x = s_left_x[np.newaxis, :] 187 | s_right_x = s_right_x[np.newaxis, :] 188 | s_top_y = s_top_y[np.newaxis, :] 189 | s_bottom_y = s_bottom_y[np.newaxis, :] 190 | # located in center region 191 | ind_lx = (center_x - s_left_x) > 0 192 | ind_rx = (center_x - s_right_x) < 0 193 | ind_ty = (center_y - s_top_y) > 0 194 | ind_by = (center_y - s_bottom_y) < 0 195 | # same classes 196 | ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0 197 | ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), 198 | axis=0) == 1 199 | index_s_new_score = np.argmax( 200 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score], 201 | axis=0) # select the box having center located in the center region 202 | s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[ 203 | index_s_new_score, 3]) / 3 204 | 205 | l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5 206 | l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5 207 | l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5 208 | l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5 209 | 210 | l_temp_score = copy.copy(l_detections[:, 4]) 211 | l_detections[:, 4] = -1 212 | 213 | center_x = center_points[:, 0][:, np.newaxis] 214 | center_y = center_points[:, 1][:, np.newaxis] 215 | l_left_x = l_left_x[np.newaxis, :] 216 | l_right_x = l_right_x[np.newaxis, :] 217 | l_top_y = l_top_y[np.newaxis, :] 218 | l_bottom_y = l_bottom_y[np.newaxis, :] 219 | 220 | ind_lx = (center_x - l_left_x) > 0 221 | ind_rx = (center_x - l_right_x) < 0 222 | ind_ty = (center_y - l_top_y) > 0 223 | ind_by = (center_y - l_bottom_y) < 0 224 | ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0 225 | ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), 226 | axis=0) == 1 227 | index_l_new_score = np.argmax( 228 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0) 229 | l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[ 230 | index_l_new_score, 3]) / 3 231 | 232 | detections = np.concatenate([l_detections, s_detections], axis=0) 233 | detections = detections[np.argsort(-detections[:, 4])] # resort according to new scores 234 | classes = detections[..., -1] 235 | 236 | # for i in range(detections.shape[0]): 237 | # box_width = detections[i,2]-detections[i,0] 238 | # box_height = detections[i,3]-detections[i,1] 239 | # if box_width*box_height<=22500 and detections[i,4]!=-1: 240 | # left_x = (2*detections[i,0]+1*detections[i,2])/3 241 | # right_x = (1*detections[i,0]+2*detections[i,2])/3 242 | # top_y = (2*detections[i,1]+1*detections[i,3])/3 243 | # bottom_y = (1*detections[i,1]+2*detections[i,3])/3 244 | # temp_score = copy.copy(detections[i,4]) 245 | # detections[i,4] = -1 246 | # for j in range(center_points.shape[0]): 247 | # if (classes[i] == center_points[j,2])and \ 248 | # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ 249 | # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): 250 | # detections[i,4] = (temp_score*2 + center_points[j,3])/3 251 | # break 252 | # elif box_width*box_height > 22500 and detections[i,4]!=-1: 253 | # left_x = (3*detections[i,0]+2*detections[i,2])/5 254 | # right_x = (2*detections[i,0]+3*detections[i,2])/5 255 | # top_y = (3*detections[i,1]+2*detections[i,3])/5 256 | # bottom_y = (2*detections[i,1]+3*detections[i,3])/5 257 | # temp_score = copy.copy(detections[i,4]) 258 | # detections[i,4] = -1 259 | # for j in range(center_points.shape[0]): 260 | # if (classes[i] == center_points[j,2])and \ 261 | # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \ 262 | # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)): 263 | # detections[i,4] = (temp_score*2 + center_points[j,3])/3 264 | # break 265 | 266 | # reject detections with negative scores 267 | keep_inds = (detections[:, 4] > -1) 268 | detections = detections[keep_inds] 269 | classes = classes[keep_inds] 270 | 271 | # soft_nms 272 | top_bboxes[image_id] = {} 273 | for j in range(categories): 274 | keep_inds = (classes == j) 275 | top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32) 276 | if merge_bbox: 277 | soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm, 278 | weight_exp=weight_exp) 279 | else: 280 | soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm) 281 | top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] # N x 5 282 | 283 | scores = np.hstack([ 284 | top_bboxes[image_id][j][:, -1] 285 | for j in range(1, categories + 1) 286 | ]) 287 | 288 | # top_box is a dict {each image result is still a dict contains results in each class} 289 | # select boxes 290 | if len(scores) > max_per_image: 291 | kth = len(scores) - max_per_image 292 | thresh = np.partition(scores, kth)[kth] 293 | for j in range(1, categories + 1): 294 | keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh) 295 | top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds] 296 | 297 | if debug: 298 | image_file = db.image_file(db_ind) 299 | image = cv2.imread(image_file) 300 | im = image[:, :, (2, 1, 0)] 301 | fig, ax = plt.subplots(figsize=(12, 12)) 302 | fig = ax.imshow(im, aspect='equal') 303 | plt.axis('off') 304 | fig.axes.get_xaxis().set_visible(False) 305 | fig.axes.get_yaxis().set_visible(False) 306 | # bboxes = {} 307 | for j in range(1, categories + 1): 308 | keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4) 309 | cat_name = db.class_name(j) 310 | for bbox in top_bboxes[image_id][j][keep_inds]: 311 | bbox = bbox[0:4].astype(np.int32) 312 | xmin = bbox[0] 313 | ymin = bbox[1] 314 | xmax = bbox[2] 315 | ymax = bbox[3] 316 | # if (xmax - xmin) * (ymax - ymin) > 5184: 317 | ax.add_patch( 318 | plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1], 319 | linewidth=4.0)) 320 | ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name), 321 | bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5), 322 | fontsize=15, color='white', weight='bold') 323 | 324 | debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind)) 325 | debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind)) 326 | plt.savefig(debug_file1) 327 | plt.savefig(debug_file2) 328 | plt.close() 329 | # cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) 330 | 331 | result_json = os.path.join(result_dir, "results.json") 332 | detections = db.convert_to_dict(top_bboxes) 333 | # with open(result_json, "w") as f: 334 | # json.dump(detections, f) 335 | 336 | db.evaluate(detections) 337 | return 0 338 | 339 | 340 | def testing(db, nnet, result_dir, debug=False): 341 | return globals()[system_configs.sampling_function](db, nnet, result_dir, debug=debug) 342 | -------------------------------------------------------------------------------- /test/vis_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | def draw_bboxes(image, bboxes, font_size=0.5, thresh=0.5, colors=None): 5 | """Draws bounding boxes on an image. 6 | 7 | Args: 8 | image: An image in OpenCV format 9 | bboxes: A dictionary representing bounding boxes of different object 10 | categories, where the keys are the names of the categories and the 11 | values are the bounding boxes. The bounding boxes of category should be 12 | stored in a 2D NumPy array, where each row is a bounding box (x1, y1, 13 | x2, y2, score). 14 | font_size: (Optional) Font size of the category names. 15 | thresh: (Optional) Only bounding boxes with scores above the threshold 16 | will be drawn. 17 | colors: (Optional) Color of bounding boxes for each category. If it is 18 | not provided, this function will use random color for each category. 19 | 20 | Returns: 21 | An image with bounding boxes. 22 | """ 23 | 24 | image = image.copy() 25 | for cat_name in bboxes: 26 | keep_inds = bboxes[cat_name][:, -1] > thresh 27 | cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, font_size, 2)[0] 28 | 29 | if colors is None: 30 | color = np.random.random((3, )) * 0.6 + 0.4 31 | color = (color * 255).astype(np.int32).tolist() 32 | else: 33 | color = colors[cat_name] 34 | 35 | for bbox in bboxes[cat_name][keep_inds]: 36 | bbox = bbox[0:4].astype(np.int32) 37 | if bbox[1] - cat_size[1] - 2 < 0: 38 | cv2.rectangle(image, 39 | (bbox[0], bbox[1] + 2), 40 | (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), 41 | color, -1 42 | ) 43 | cv2.putText(image, cat_name, 44 | (bbox[0], bbox[1] + cat_size[1] + 2), 45 | cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1 46 | ) 47 | else: 48 | cv2.rectangle(image, 49 | (bbox[0], bbox[1] - cat_size[1] - 2), 50 | (bbox[0] + cat_size[0], bbox[1] - 2), 51 | color, -1 52 | ) 53 | cv2.putText(image, cat_name, 54 | (bbox[0], bbox[1] - 2), 55 | cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1 56 | ) 57 | cv2.rectangle(image, 58 | (bbox[0], bbox[1]), 59 | (bbox[2], bbox[3]), 60 | color, 2 61 | ) 62 | return image 63 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # !/usr/bin/env python 2 | import os 3 | import json 4 | import torch 5 | import queue 6 | import pprint 7 | import argparse 8 | import importlib 9 | import threading 10 | import traceback 11 | import numpy as np 12 | 13 | from tqdm import tqdm 14 | from utils import stdout_to_tqdm 15 | from db.datasets import datasets 16 | from config import system_configs 17 | from nnet.py_factory import NetworkFactory 18 | from torch.multiprocessing import Process, Queue 19 | 20 | os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1' 21 | torch.backends.cudnn.enabled = True 22 | torch.backends.cudnn.benchmark = True 23 | 24 | 25 | def parse_args(): 26 | parser = argparse.ArgumentParser(description="Train CenterNet") 27 | parser.add_argument("--cfg_file", default='CenterNet-52', help="config file", type=str) 28 | parser.add_argument("--iter", dest="start_iter", 29 | help="train at iteration i", 30 | default=0, type=int) 31 | parser.add_argument("--threads", dest="threads", default=16, type=int) 32 | 33 | #args = parser.parse_args() 34 | args, unparsed = parser.parse_known_args() 35 | return args 36 | 37 | def prefetch_data(db, queue, sample_data, data_aug): 38 | ind = 0 39 | print("start prefetching data...") 40 | np.random.seed(os.getpid()) 41 | while True: 42 | try: 43 | data, ind = sample_data(db, ind, data_aug=data_aug) 44 | queue.put(data) 45 | except Exception as e: 46 | traceback.print_exc() 47 | raise e 48 | 49 | def pin_memory(data_queue, pinned_data_queue, sema): 50 | while True: 51 | data = data_queue.get() 52 | data["xs"] = [x.pin_memory() for x in data["xs"]] 53 | data["ys"] = [y.pin_memory() for y in data["ys"]] 54 | 55 | pinned_data_queue.put(data) 56 | if sema.acquire(blocking=False): 57 | return 58 | 59 | 60 | def init_parallel_jobs(dbs, queue, fn, data_aug): 61 | tasks = [Process(target=prefetch_data, args=(db, queue, fn, data_aug)) for db in dbs] 62 | for task in tasks: 63 | task.daemon = True 64 | task.start() 65 | return tasks 66 | 67 | 68 | def train(training_dbs, validation_db, start_iter=0): 69 | learning_rate = system_configs.learning_rate 70 | max_iteration = system_configs.max_iter 71 | pretrained_model = system_configs.pretrain 72 | snapshot = system_configs.snapshot 73 | val_iter = system_configs.val_iter 74 | display = system_configs.display 75 | decay_rate = system_configs.decay_rate 76 | stepsize = system_configs.stepsize 77 | 78 | training_size = len(training_dbs[0].db_inds) 79 | validation_size = len(validation_db.db_inds) 80 | 81 | # queues storing data for training 82 | training_queue = Queue(system_configs.prefetch_size) # buffer size of prefetch data 83 | validation_queue = Queue(5) 84 | 85 | # queues storing pinned data for training 86 | pinned_training_queue = queue.Queue(system_configs.prefetch_size) 87 | pinned_validation_queue = queue.Queue(5) 88 | 89 | # load data sampling function 90 | data_file = "sample.{}".format(training_dbs[0].data) 91 | sample_data = importlib.import_module(data_file).sample_data 92 | 93 | #allocate resources for parallel reading 94 | training_tasks = init_parallel_jobs(training_dbs, training_queue, sample_data, True) 95 | if val_iter: 96 | validation_tasks = init_parallel_jobs([validation_db], validation_queue, sample_data, False) 97 | 98 | training_pin_semaphore = threading.Semaphore() 99 | validation_pin_semaphore = threading.Semaphore() 100 | training_pin_semaphore.acquire() 101 | validation_pin_semaphore.acquire() 102 | 103 | training_pin_args = (training_queue, pinned_training_queue, training_pin_semaphore) 104 | training_pin_thread = threading.Thread(target=pin_memory, args=training_pin_args) 105 | training_pin_thread.daemon = True 106 | training_pin_thread.start() 107 | 108 | validation_pin_args = (validation_queue, pinned_validation_queue, validation_pin_semaphore) 109 | validation_pin_thread = threading.Thread(target=pin_memory, args=validation_pin_args) 110 | validation_pin_thread.daemon = True 111 | validation_pin_thread.start() 112 | 113 | print("building model...") 114 | nnet = NetworkFactory(training_dbs[0]) 115 | 116 | 117 | if pretrained_model is not None: 118 | if not os.path.exists(pretrained_model): 119 | raise ValueError("pretrained model does not exist") 120 | print("loading from pretrained model") 121 | nnet.load_pretrained_params(pretrained_model) 122 | 123 | if start_iter: 124 | learning_rate /= (decay_rate ** (start_iter // stepsize)) 125 | 126 | nnet.load_params(start_iter) 127 | nnet.set_lr(learning_rate) 128 | print("training starts from iteration {} with learning_rate {}".format(start_iter + 1, learning_rate)) 129 | else: 130 | nnet.set_lr(learning_rate) 131 | 132 | print("training start...") 133 | nnet.cuda() 134 | nnet.train_mode() 135 | 136 | with stdout_to_tqdm() as save_stdout: 137 | for iteration in tqdm(range(start_iter+1, max_iteration+1), file=save_stdout, ncols=80): 138 | training = pinned_training_queue.get(block=True) 139 | training_loss, focal_loss, pull_loss, push_loss, regr_loss = nnet.train(**training) 140 | 141 | if display and iteration % display == 0: 142 | print("training loss at iteration {}: {}".format(iteration, training_loss.item())) 143 | print("focal loss at iteration {}: {}".format(iteration, focal_loss.item())) 144 | print("pull loss at iteration {}: {}".format(iteration, pull_loss.item())) 145 | print("push loss at iteration {}: {}".format(iteration, push_loss.item())) 146 | print("regr loss at iteration {}: {}".format(iteration, regr_loss.item())) 147 | 148 | del training_loss, focal_loss, pull_loss, push_loss, regr_loss 149 | 150 | if val_iter and validation_db.db_inds.size and iteration % val_iter == 0: 151 | nnet.eval_mode() 152 | validation = pinned_validation_queue.get(block=True) 153 | validation_loss = nnet.validate(**validation) 154 | print("validation loss at iteration {}: {}".format(iteration, validation_loss.item())) 155 | nnet.train_mode() 156 | 157 | if iteration % snapshot == 0: 158 | nnet.save_params(iteration) 159 | 160 | if iteration % stepsize == 0: 161 | learning_rate /= decay_rate 162 | nnet.set_lr(learning_rate) 163 | 164 | # sending signal to kill the thread 165 | training_pin_semaphore.release() 166 | validation_pin_semaphore.release() 167 | 168 | # terminating data fetching processes 169 | for training_task in training_tasks: 170 | training_task.terminate() 171 | for validation_task in validation_tasks: 172 | validation_task.terminate() 173 | 174 | 175 | if __name__=="__main__": 176 | args = parse_args() 177 | cfg_file = os.path.join(system_configs.config_dir, args.cfg_file+".json") 178 | with open(cfg_file, "r") as f: 179 | configs = json.load(f) 180 | 181 | configs["system"]["snapshot_name"] = args.cfg_file 182 | system_configs.update_config(configs["system"]) 183 | 184 | train_split = system_configs.train_split 185 | val_split = system_configs.val_split 186 | print("loading all datasets ...") 187 | dataset = system_configs.dataset 188 | threads = args.threads 189 | 190 | print("using {} threads".format(threads)) 191 | training_dbs = [datasets[dataset](configs["db"], train_split) for _ in range(threads)] 192 | validation_db = datasets[dataset](configs["db"], val_split) 193 | 194 | print("system config ...") 195 | pprint.pprint(system_configs.full) 196 | 197 | print("db config...") 198 | pprint.pprint(training_dbs[0].configs) 199 | 200 | print("len of db: {}".format(len(training_dbs[0].db_inds))) 201 | train(training_dbs, validation_db, args.start_iter) 202 | 203 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .tqdm import stdout_to_tqdm 2 | 3 | from .image import crop_image 4 | from .image import color_jittering_, lighting_, normalize_ 5 | -------------------------------------------------------------------------------- /utils/image.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import random 4 | 5 | def grayscale(image): 6 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 7 | 8 | def normalize_(image, mean, std): 9 | image -= mean 10 | image /= std 11 | 12 | def lighting_(data_rng, image, alphastd, eigval, eigvec): 13 | alpha = data_rng.normal(scale=alphastd, size=(3, )) 14 | image += np.dot(eigvec, eigval * alpha) 15 | 16 | def blend_(alpha, image1, image2): 17 | image1 *= alpha 18 | image2 *= (1 - alpha) 19 | image1 += image2 20 | 21 | def saturation_(data_rng, image, gs, gs_mean, var): 22 | alpha = 1. + data_rng.uniform(low=-var, high=var) 23 | blend_(alpha, image, gs[:, :, None]) 24 | 25 | def brightness_(data_rng, image, gs, gs_mean, var): 26 | alpha = 1. + data_rng.uniform(low=-var, high=var) 27 | image *= alpha 28 | 29 | def contrast_(data_rng, image, gs, gs_mean, var): 30 | alpha = 1. + data_rng.uniform(low=-var, high=var) 31 | blend_(alpha, image, gs_mean) 32 | 33 | def color_jittering_(data_rng, image): 34 | functions = [brightness_, contrast_, saturation_] 35 | random.shuffle(functions) 36 | 37 | gs = grayscale(image) 38 | gs_mean = gs.mean() 39 | for f in functions: 40 | f(data_rng, image, gs, gs_mean, 0.4) 41 | 42 | def crop_image(image, center, size): 43 | cty, ctx = center 44 | height, width = size 45 | im_height, im_width = image.shape[0:2] 46 | cropped_image = np.zeros((height, width, 3), dtype=image.dtype) 47 | 48 | x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width) 49 | y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height) 50 | 51 | left, right = ctx - x0, x1 - ctx 52 | top, bottom = cty - y0, y1 - cty 53 | 54 | cropped_cty, cropped_ctx = height // 2, width // 2 55 | y_slice = slice(cropped_cty - top, cropped_cty + bottom) 56 | x_slice = slice(cropped_ctx - left, cropped_ctx + right) 57 | cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :] 58 | 59 | border = np.array([ 60 | cropped_cty - top, 61 | cropped_cty + bottom, 62 | cropped_ctx - left, 63 | cropped_ctx + right 64 | ], dtype=np.float32) 65 | 66 | offset = np.array([ 67 | cty - height // 2, 68 | ctx - width // 2 69 | ]) 70 | 71 | return cropped_image, border, offset 72 | -------------------------------------------------------------------------------- /utils/tqdm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import contextlib 4 | 5 | from tqdm import tqdm 6 | 7 | class TqdmFile(object): 8 | dummy_file = None 9 | def __init__(self, dummy_file): 10 | self.dummy_file = dummy_file 11 | 12 | def write(self, x): 13 | if len(x.rstrip()) > 0: 14 | tqdm.write(x, file=self.dummy_file) 15 | 16 | @contextlib.contextmanager 17 | def stdout_to_tqdm(): 18 | save_stdout = sys.stdout 19 | try: 20 | sys.stdout = TqdmFile(sys.stdout) 21 | yield save_stdout 22 | except Exception as exc: 23 | raise exc 24 | finally: 25 | sys.stdout = save_stdout 26 | --------------------------------------------------------------------------------