├── .idea
├── CenterNetPerson.iml
└── modules.xml
├── README.md
├── conda_packagelist.txt
├── config.py
├── config
├── CenterNet-52-multi_scale.json
└── CenterNet-52.json
├── data
└── demo
│ ├── 000001.jpg
│ └── 000003.jpg
├── db
├── __init__.py
├── base.py
├── cityperson.py
├── datasets.py
└── detection.py
├── demo.py
├── external
├── .gitignore
├── Makefile
├── __init__.py
├── nms.pyx
├── nms.so
└── setup.py
├── models
├── CenterNet-104.py
├── CenterNet-52.py
├── __init__.py
└── py_utils
│ ├── __init__.py
│ ├── _cpools
│ ├── .gitignore
│ ├── __init__.py
│ ├── setup.py
│ └── src
│ │ ├── bottom_pool.cpp
│ │ ├── left_pool.cpp
│ │ ├── right_pool.cpp
│ │ └── top_pool.cpp
│ ├── data_parallel.py
│ ├── kp.py
│ ├── kp_utils.py
│ ├── scatter_gather.py
│ └── utils.py
├── nnet
├── __init__.py
└── py_factory.py
├── sample
├── __init__.py
├── pedestrian.py
└── utils.py
├── test.py
├── test
├── __init__.py
├── base.py
├── centernet.py
├── detector.py
├── pedestrian.py
└── vis_utils.py
├── train.py
└── utils
├── __init__.py
├── image.py
└── tqdm.py
/.idea/CenterNetPerson.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | ##Pedestrian detection based on CenterNet
3 |
4 | In this repo, we re-train the centernet on CityPerson dataset to get a pedestrian detector
5 | [CenterNet](https://github.com/Duankaiwen/CenterNet)
6 |
7 |
8 | ##Preparation
9 |
10 | Please first install [Anaconda](https://anaconda.org) and create an Anaconda environment using the provided package list.
11 | ```
12 | conda create --name CenterNet --file conda_packagelist.txt
13 | ```
14 |
15 | After you create the environment, activate it.
16 | ```
17 | source activate CenterNet
18 | ```
19 |
20 | ## Compiling Corner Pooling Layers
21 | ```
22 | cd /models/py_utils/_cpools/
23 | python setup.py install --user
24 | ```
25 |
26 | ## Compiling NMS
27 | ```
28 | cd /external
29 | make
30 | ```
31 |
32 | ## CityPerson dataset
33 |
34 | - Download the CityPerson dataset and label files in [images](https://www.cityscapes-dataset.com/file-handling/?packageID=3), [label](https://www.cityscapes-dataset.com/file-handling/?packageID=28)
35 | - create a softlink in `data` to your CityPerson data
36 | ```
37 | ln -s #to/yourdata/CityPerson data/
38 | ```
39 |
40 | ## Training and Evaluation
41 | To train CenterNet-52
42 | ```buildoutcfg
43 | python train.py --cfg_file CenterNet-52
44 | ```
45 | The default configure in `config/CenterNet-52.json` is 2 (12G) GPUs and batchsize=12, you can modify them according to your case.
46 |
47 | To evaluate your detector
48 | ```buildoutcfg
49 | python test.py --cfg_file CenterNet-52 --testiter #checkpoint_epoch
50 | ```
51 |
52 | ## Demo
53 | The demo images are stored in `data/demo`
54 | ```buildoutcfg
55 | python demo.py
56 | ```
--------------------------------------------------------------------------------
/conda_packagelist.txt:
--------------------------------------------------------------------------------
1 | # This file may be used to create an environment using:
2 | # $ conda create --name --file
3 | # platform: linux-64
4 | @EXPLICIT
5 | https://repo.continuum.io/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2
6 | https://repo.continuum.io/pkgs/main/linux-64/bzip2-1.0.6-h9a117a8_4.tar.bz2
7 | https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2018.4.16-0.tar.bz2
8 | https://conda.anaconda.org/caffe2/linux-64/caffe2-cuda8.0-cudnn7-0.8.dev-py36_2018.05.14.tar.bz2
9 | https://repo.continuum.io/pkgs/main/linux-64/cairo-1.14.12-h7636065_2.tar.bz2
10 | https://repo.continuum.io/pkgs/main/linux-64/certifi-2018.4.16-py36_0.tar.bz2
11 | https://repo.continuum.io/pkgs/main/linux-64/cffi-1.11.5-py36h9745a5d_0.tar.bz2
12 | https://repo.continuum.io/pkgs/free/linux-64/cudatoolkit-8.0-3.tar.bz2
13 | https://repo.continuum.io/pkgs/main/linux-64/cycler-0.10.0-py36h93f1223_0.tar.bz2
14 | https://repo.continuum.io/pkgs/main/linux-64/dbus-1.13.2-h714fa37_1.tar.bz2
15 | https://repo.continuum.io/pkgs/main/linux-64/expat-2.2.5-he0dffb1_0.tar.bz2
16 | https://repo.continuum.io/pkgs/main/linux-64/ffmpeg-3.4-h7264315_0.tar.bz2
17 | https://repo.continuum.io/pkgs/main/linux-64/fontconfig-2.12.6-h49f89f6_0.tar.bz2
18 | https://repo.continuum.io/pkgs/free/linux-64/freeglut-2.8.1-0.tar.bz2
19 | https://repo.continuum.io/pkgs/main/linux-64/freetype-2.8-hab7d2ae_1.tar.bz2
20 | https://repo.continuum.io/pkgs/free/linux-64/future-0.16.0-py36_1.tar.bz2
21 | https://repo.continuum.io/pkgs/main/linux-64/gflags-2.2.1-hf484d3e_0.tar.bz2
22 | https://repo.continuum.io/pkgs/main/linux-64/glib-2.56.1-h000015b_0.tar.bz2
23 | https://repo.continuum.io/pkgs/main/linux-64/glog-0.3.5-hf484d3e_1.tar.bz2
24 | https://repo.continuum.io/pkgs/main/linux-64/graphite2-1.3.11-hf63cedd_1.tar.bz2
25 | https://repo.continuum.io/pkgs/main/linux-64/gst-plugins-base-1.14.0-hbbd80ab_1.tar.bz2
26 | https://repo.continuum.io/pkgs/main/linux-64/gstreamer-1.14.0-hb453b48_1.tar.bz2
27 | https://repo.continuum.io/pkgs/main/linux-64/h5py-2.8.0-py36hca9c191_0.tar.bz2
28 | https://repo.continuum.io/pkgs/main/linux-64/harfbuzz-1.7.6-h5f0a787_1.tar.bz2
29 | https://repo.continuum.io/pkgs/main/linux-64/hdf5-1.8.18-h6792536_1.tar.bz2
30 | https://repo.continuum.io/pkgs/main/linux-64/icu-58.2-h9c2bf20_1.tar.bz2
31 | https://repo.continuum.io/pkgs/main/linux-64/intel-openmp-2018.0.0-8.tar.bz2
32 | https://repo.continuum.io/pkgs/main/linux-64/jasper-2.0.14-h07fcdf6_0.tar.bz2
33 | https://repo.continuum.io/pkgs/main/linux-64/cython-0.26.1-py36h21c49d0_0.tar.bz2
34 | https://repo.continuum.io/pkgs/main/linux-64/jpeg-9b-h024ee3a_2.tar.bz2
35 | https://repo.continuum.io/pkgs/main/linux-64/kiwisolver-1.0.1-py36h764f252_0.tar.bz2
36 | https://repo.continuum.io/pkgs/main/linux-64/libedit-3.1-heed3624_0.tar.bz2
37 | https://repo.continuum.io/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2
38 | https://repo.continuum.io/pkgs/main/linux-64/libgcc-ng-7.2.0-hdf63c60_3.tar.bz2
39 | https://repo.continuum.io/pkgs/main/linux-64/libgfortran-ng-7.2.0-hdf63c60_3.tar.bz2
40 | https://repo.continuum.io/pkgs/main/linux-64/libglu-9.0.0-h0c0bdc1_1.tar.bz2
41 | https://repo.continuum.io/pkgs/main/linux-64/libopus-1.2.1-hb9ed12e_0.tar.bz2
42 | https://repo.continuum.io/pkgs/main/linux-64/libpng-1.6.34-hb9fc6fc_0.tar.bz2
43 | https://repo.continuum.io/pkgs/main/linux-64/libprotobuf-3.5.2-h6f1eeef_0.tar.bz2
44 | https://repo.continuum.io/pkgs/main/linux-64/libstdcxx-ng-7.2.0-hdf63c60_3.tar.bz2
45 | https://repo.continuum.io/pkgs/main/linux-64/libtiff-4.0.9-h28f6b97_0.tar.bz2
46 | https://repo.continuum.io/pkgs/main/linux-64/libvpx-1.6.1-h888fd40_0.tar.bz2
47 | https://repo.continuum.io/pkgs/main/linux-64/libxcb-1.13-h1bed415_1.tar.bz2
48 | https://repo.continuum.io/pkgs/main/linux-64/libxml2-2.9.8-hf84eae3_0.tar.bz2
49 | https://repo.continuum.io/pkgs/main/linux-64/matplotlib-2.2.2-py36h0e671d2_1.tar.bz2
50 | https://repo.continuum.io/pkgs/main/linux-64/mkl-2018.0.2-1.tar.bz2
51 | https://repo.continuum.io/pkgs/main/linux-64/mkl_fft-1.0.1-py36h3010b51_0.tar.bz2
52 | https://repo.continuum.io/pkgs/main/linux-64/mkl_random-1.0.1-py36h629b387_0.tar.bz2
53 | https://repo.continuum.io/pkgs/main/linux-64/ncurses-6.0-h9df7e31_2.tar.bz2
54 | https://repo.continuum.io/pkgs/main/linux-64/ninja-1.8.2-py36h6bb024c_1.tar.bz2
55 | https://repo.continuum.io/pkgs/main/linux-64/numpy-1.14.3-py36hcd700cb_1.tar.bz2
56 | https://repo.continuum.io/pkgs/main/linux-64/numpy-base-1.14.3-py36h9be14a7_1.tar.bz2
57 | https://repo.continuum.io/pkgs/main/linux-64/olefile-0.45.1-py36_0.tar.bz2
58 | https://repo.continuum.io/pkgs/main/linux-64/opencv-3.3.1-py36h9248ab4_2.tar.bz2
59 | https://repo.continuum.io/pkgs/main/linux-64/openssl-1.0.2o-h20670df_0.tar.bz2
60 | https://repo.continuum.io/pkgs/main/linux-64/pcre-8.42-h439df22_0.tar.bz2
61 | https://repo.continuum.io/pkgs/main/linux-64/pillow-5.1.0-py36h3deb7b8_0.tar.bz2
62 | https://repo.continuum.io/pkgs/main/linux-64/pip-10.0.1-py36_0.tar.bz2
63 | https://repo.continuum.io/pkgs/main/linux-64/pixman-0.34.0-hceecf20_3.tar.bz2
64 | https://conda.anaconda.org/conda-forge/linux-64/protobuf-3.5.2-py36_0.tar.bz2
65 | https://repo.continuum.io/pkgs/main/linux-64/pycparser-2.18-py36hf9f622e_1.tar.bz2
66 | https://repo.continuum.io/pkgs/main/linux-64/pyparsing-2.2.0-py36hee85983_1.tar.bz2
67 | https://repo.continuum.io/pkgs/main/linux-64/pyqt-5.9.2-py36h751905a_0.tar.bz2
68 | https://repo.continuum.io/pkgs/main/linux-64/python-3.6.4-hc3d631a_3.tar.bz2
69 | https://repo.continuum.io/pkgs/main/linux-64/python-dateutil-2.7.2-py36_0.tar.bz2
70 | https://conda.anaconda.org/pytorch/linux-64/pytorch-0.4.1-py36_cuda8.0.61_cudnn7.1.2_1.tar.bz2
71 | https://repo.continuum.io/pkgs/main/linux-64/pytz-2018.4-py36_0.tar.bz2
72 | https://repo.continuum.io/pkgs/main/linux-64/pyyaml-3.12-py36hafb9ca4_1.tar.bz2
73 | https://repo.continuum.io/pkgs/main/linux-64/qt-5.9.5-h7e424d6_0.tar.bz2
74 | https://repo.continuum.io/pkgs/main/linux-64/readline-7.0-ha6073c6_4.tar.bz2
75 | https://repo.continuum.io/pkgs/main/linux-64/scikit-learn-0.19.1-py36h7aa7ec6_0.tar.bz2
76 | https://repo.continuum.io/pkgs/main/linux-64/scipy-1.1.0-py36hfc37229_0.tar.bz2
77 | https://repo.continuum.io/pkgs/main/linux-64/setuptools-39.1.0-py36_0.tar.bz2
78 | https://repo.continuum.io/pkgs/main/linux-64/sip-4.19.8-py36hf484d3e_0.tar.bz2
79 | https://repo.continuum.io/pkgs/main/linux-64/six-1.11.0-py36h372c433_1.tar.bz2
80 | https://repo.continuum.io/pkgs/main/linux-64/sqlite-3.23.1-he433501_0.tar.bz2
81 | https://repo.continuum.io/pkgs/main/linux-64/tk-8.6.7-hc745277_3.tar.bz2
82 | https://conda.anaconda.org/pytorch/linux-64/torchvision-0.2.1-py36_1.tar.bz2
83 | https://repo.continuum.io/pkgs/main/linux-64/tornado-5.0.2-py36_0.tar.bz2
84 | https://repo.continuum.io/pkgs/main/linux-64/tqdm-4.23.0-py36_0.tar.bz2
85 | https://repo.continuum.io/pkgs/main/linux-64/wheel-0.31.0-py36_0.tar.bz2
86 | https://repo.continuum.io/pkgs/main/linux-64/xz-5.2.3-h5e939de_4.tar.bz2
87 | https://repo.continuum.io/pkgs/main/linux-64/yaml-0.1.7-had09818_2.tar.bz2
88 | https://repo.continuum.io/pkgs/main/linux-64/zlib-1.2.11-ha838bed_2.tar.bz2
89 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | class Config:
5 | def __init__(self):
6 | self._configs = {}
7 | self._configs["dataset"] = None
8 | self._configs["sampling_function"] = "kp_detection"
9 |
10 | # Training Config
11 | self._configs["display"] = 50
12 | self._configs["snapshot"] = 5000
13 | self._configs["stepsize"] = 450000
14 | self._configs["learning_rate"] = 0.00025
15 | self._configs["decay_rate"] = 10
16 | self._configs["max_iter"] = 500000
17 | self._configs["val_iter"] = 100
18 | self._configs["batch_size"] = 1
19 | self._configs["snapshot_name"] = None
20 | self._configs["prefetch_size"] = 100
21 | self._configs["weight_decay"] = False
22 | self._configs["weight_decay_rate"] = 1e-5
23 | self._configs["weight_decay_type"] = "l2"
24 | self._configs["pretrain"] = None
25 | self._configs["opt_algo"] = "adam"
26 | self._configs["chunk_sizes"] = None
27 |
28 | # Directories
29 | self._configs["data_dir"] = "/data/zwzhou/Data/MOT17/MOT17Labels"
30 | self._configs["cache_dir"] = "cache"
31 | self._configs["config_dir"] = "config"
32 | self._configs["result_dir"] = "results"
33 |
34 | # Split
35 | self._configs["train_split"] = "trainval"
36 | self._configs["val_split"] = "minival"
37 | self._configs["test_split"] = "testdev"
38 |
39 | # Rng
40 | self._configs["data_rng"] = np.random.RandomState(123)
41 | self._configs["nnet_rng"] = np.random.RandomState(317)
42 |
43 |
44 | @property
45 | def chunk_sizes(self):
46 | return self._configs["chunk_sizes"]
47 |
48 | @property
49 | def train_split(self):
50 | return self._configs["train_split"]
51 |
52 | @property
53 | def val_split(self):
54 | return self._configs["val_split"]
55 |
56 | @property
57 | def test_split(self):
58 | return self._configs["test_split"]
59 |
60 | @property
61 | def full(self):
62 | return self._configs
63 |
64 | @property
65 | def sampling_function(self):
66 | return self._configs["sampling_function"]
67 |
68 | @property
69 | def data_rng(self):
70 | return self._configs["data_rng"]
71 |
72 | @property
73 | def nnet_rng(self):
74 | return self._configs["nnet_rng"]
75 |
76 | @property
77 | def opt_algo(self):
78 | return self._configs["opt_algo"]
79 |
80 | @property
81 | def weight_decay_type(self):
82 | return self._configs["weight_decay_type"]
83 |
84 | @property
85 | def prefetch_size(self):
86 | return self._configs["prefetch_size"]
87 |
88 | @property
89 | def pretrain(self):
90 | return self._configs["pretrain"]
91 |
92 | @property
93 | def weight_decay_rate(self):
94 | return self._configs["weight_decay_rate"]
95 |
96 | @property
97 | def weight_decay(self):
98 | return self._configs["weight_decay"]
99 |
100 | @property
101 | def result_dir(self):
102 | result_dir = os.path.join(self._configs["result_dir"], self.snapshot_name)
103 | if not os.path.exists(result_dir):
104 | os.makedirs(result_dir)
105 | return result_dir
106 |
107 | @property
108 | def dataset(self):
109 | return self._configs["dataset"]
110 |
111 | @property
112 | def snapshot_name(self):
113 | return self._configs["snapshot_name"]
114 |
115 | @property
116 | def snapshot_dir(self):
117 | snapshot_dir = os.path.join(self.cache_dir, "nnet", self.snapshot_name)
118 |
119 | if not os.path.exists(snapshot_dir):
120 | os.makedirs(snapshot_dir)
121 |
122 | return snapshot_dir
123 |
124 | @property
125 | def snapshot_file(self):
126 | snapshot_file = os.path.join(self.snapshot_dir, self.snapshot_name + "_{}.pkl")
127 | return snapshot_file
128 |
129 | @property
130 | def config_dir(self):
131 | return self._configs["config_dir"]
132 |
133 | @property
134 | def batch_size(self):
135 | return self._configs["batch_size"]
136 |
137 | @property
138 | def max_iter(self):
139 | return self._configs["max_iter"]
140 |
141 | @property
142 | def learning_rate(self):
143 | return self._configs["learning_rate"]
144 |
145 | @property
146 | def decay_rate(self):
147 | return self._configs["decay_rate"]
148 |
149 | @property
150 | def stepsize(self):
151 | return self._configs["stepsize"]
152 |
153 | @property
154 | def snapshot(self):
155 | return self._configs["snapshot"]
156 |
157 | @property
158 | def display(self):
159 | return self._configs["display"]
160 |
161 | @property
162 | def val_iter(self):
163 | return self._configs["val_iter"]
164 |
165 | @property
166 | def data_dir(self):
167 | return self._configs["data_dir"]
168 |
169 | @property
170 | def cache_dir(self):
171 | if not os.path.exists(self._configs["cache_dir"]):
172 | os.makedirs(self._configs["cache_dir"])
173 | return self._configs["cache_dir"]
174 |
175 | def update_config(self, new):
176 | for key in new:
177 | if key in self._configs:
178 | self._configs[key] = new[key]
179 |
180 | system_configs = Config()
181 |
--------------------------------------------------------------------------------
/config/CenterNet-52-multi_scale.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "dataset": "MSCOCO",
4 | "batch_size": 24,
5 | "sampling_function": "kp_detection",
6 |
7 | "train_split": "trainval",
8 | "val_split": "minival",
9 |
10 | "learning_rate": 0.00025,
11 | "decay_rate": 10,
12 |
13 | "val_iter": 500,
14 |
15 | "opt_algo": "adam",
16 | "prefetch_size": 6,
17 |
18 | "max_iter": 480000,
19 | "stepsize": 450000,
20 | "snapshot": 5000,
21 |
22 | "chunk_sizes": [6,6,6,6],
23 |
24 | "data_dir": "./data"
25 | },
26 |
27 | "db": {
28 | "rand_scale_min": 0.6,
29 | "rand_scale_max": 1.4,
30 | "rand_scale_step": 0.1,
31 | "rand_scales": null,
32 |
33 | "rand_crop": true,
34 | "rand_color": true,
35 |
36 | "border": 128,
37 | "gaussian_bump": true,
38 |
39 | "input_size": [511, 511],
40 | "output_sizes": [[128, 128]],
41 |
42 | "test_scales": [0.6, 1, 1.2],
43 |
44 | "top_k": 70,
45 | "categories": 80,
46 | "kp_categories": 1,
47 | "ae_threshold": 0.5,
48 | "nms_threshold": 0.5,
49 |
50 | "merge_bbox": true,
51 | "weight_exp": 10,
52 |
53 | "max_per_image": 100
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/config/CenterNet-52.json:
--------------------------------------------------------------------------------
1 | {
2 | "system": {
3 | "dataset": "CityPerson",
4 | "batch_size": 12,
5 | "sampling_function": "kp_detection",
6 |
7 | "train_split": "trainval",
8 | "val_split": "minival",
9 |
10 | "learning_rate": 0.00025,
11 | "decay_rate": 10,
12 |
13 | "val_iter": 500,
14 |
15 | "opt_algo": "adam",
16 | "prefetch_size": 20,
17 |
18 | "max_iter": 400000,
19 | "stepsize": 350000,
20 | "snapshot": 10000,
21 |
22 | "chunk_sizes": [6, 6],
23 |
24 | "data_dir": "/data/zwzhou/Data/CityPerson"
25 | },
26 |
27 | "db": {
28 | "rand_scale_min": 0.6,
29 | "rand_scale_max": 1.4,
30 | "rand_scale_step": 0.1,
31 | "rand_scales": null,
32 |
33 | "rand_crop": true,
34 | "rand_color": true,
35 |
36 | "border": 128,
37 | "gaussian_bump": true,
38 |
39 | "input_size": [511, 511],
40 | "output_sizes": [[128, 128]],
41 |
42 | "test_scales": [1],
43 |
44 | "top_k": 70,
45 | "categories": 1,
46 | "kp_categories": 1,
47 | "ae_threshold": 0.5,
48 | "nms_threshold": 0.5,
49 |
50 | "max_per_image": 100,
51 | "pretrained_model": "cache/nnet/CenterNet-52/CenterNet-52_480000.pkl"
52 |
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/data/demo/000001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/data/demo/000001.jpg
--------------------------------------------------------------------------------
/data/demo/000003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/data/demo/000003.jpg
--------------------------------------------------------------------------------
/db/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/db/__init__.py
--------------------------------------------------------------------------------
/db/base.py:
--------------------------------------------------------------------------------
1 | import os
2 | import h5py
3 | import numpy as np
4 |
5 | from config import system_configs
6 |
7 | class BASE(object):
8 | def __init__(self):
9 | self._split = None
10 | self._db_inds = []
11 | self._image_ids = []
12 |
13 | self._data = None
14 | self._image_hdf5 = None
15 | self._image_file = None
16 | self._image_hdf5_file = None
17 |
18 | self._mean = np.zeros((3, ), dtype=np.float32)
19 | self._std = np.ones((3, ), dtype=np.float32)
20 | self._eig_val = np.ones((3, ), dtype=np.float32)
21 | self._eig_vec = np.zeros((3, 3), dtype=np.float32)
22 |
23 | self._configs = {}
24 | self._train_cfg = {}
25 | self._model = {}
26 | self._configs["data_aug"] = True
27 |
28 | self._data_rng = None
29 |
30 | @property
31 | def data(self):
32 | if self._data is None:
33 | raise ValueError("data is not set")
34 | return self._data
35 |
36 | @property
37 | def configs(self):
38 | return self._configs
39 |
40 | @property
41 | def train_cfg(self):
42 | return self._train_cfg
43 |
44 | @property
45 | def model(self):
46 | return self._model
47 |
48 | @property
49 | def mean(self):
50 | return self._mean
51 |
52 | @property
53 | def std(self):
54 | return self._std
55 |
56 | @property
57 | def eig_val(self):
58 | return self._eig_val
59 |
60 | @property
61 | def eig_vec(self):
62 | return self._eig_vec
63 |
64 | @property
65 | def db_inds(self):
66 | return self._db_inds
67 |
68 | @property
69 | def split(self):
70 | return self._split
71 |
72 | def update_config(self, new):
73 | for key in new:
74 | if key in self._configs:
75 | self._configs[key] = new[key]
76 |
77 | def image_ids(self, ind):
78 | return self._image_ids[ind]
79 |
80 | def image_file(self, ind):
81 | if self._image_file is None:
82 | raise ValueError("Image path is not initialized")
83 |
84 | image_id = self._image_ids[ind]
85 | return self._image_file.format(image_id)
86 |
87 | def write_result(self, ind, all_bboxes, all_scores):
88 | pass
89 |
90 | def evaluate(self, name):
91 | pass
92 |
93 | def shuffle_inds(self, quiet=False):
94 | if self._data_rng is None:
95 | self._data_rng = np.random.RandomState(os.getpid())
96 |
97 | if not quiet:
98 | print("shuffling indices...")
99 | rand_perm = self._data_rng.permutation(len(self._db_inds))
100 | self._db_inds = self._db_inds[rand_perm]
101 |
--------------------------------------------------------------------------------
/db/cityperson.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import pickle
4 | import numpy as np
5 | from tqdm import tqdm
6 | from config import system_configs
7 | from db.detection import DETECTION
8 |
9 |
10 | class CityPerson(DETECTION):
11 | def __init__(self, db_config, split):
12 | super(CityPerson, self).__init__(db_config)
13 |
14 | data_dir = system_configs.data_dir
15 | result_dir = system_configs.result_dir
16 | cache_dir = system_configs.cache_dir
17 |
18 | self._split = split
19 | self._dataset = {
20 | "trainval": "train",
21 | "minival": "val"
22 | }[self._split]
23 |
24 | self._image_dir = os.path.join(data_dir, "leftImg8bit")
25 |
26 | self._image_file = os.path.join(self._image_dir, "{}")
27 |
28 | self._anno_dir = os.path.join(data_dir, "gtBboxCityPersons")
29 |
30 | self._data = "pedestrian" # the sample function file
31 | self._mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)
32 | self._std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)
33 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32)
34 | self._eig_vec = np.array([
35 | [-0.58752847, -0.69563484, 0.41340352],
36 | [-0.5832747, 0.00994535, -0.81221408],
37 | [-0.56089297, 0.71832671, 0.41158938]
38 | ], dtype=np.float32)
39 |
40 | self._cache_file = os.path.join(cache_dir, "cityperson_{}.pkl".format(self._split))
41 | self._load_data()
42 | self._db_inds = np.arange(len(self._image_ids))
43 |
44 |
45 | def _load_data(self):
46 | print("loading from cache file: {}".format(self._cache_file))
47 | if not os.path.exists(self._cache_file):
48 | print("No cache file found...")
49 | self._extract_data()
50 | with open(self._cache_file, "wb") as f:
51 | pickle.dump([self._detections, self._image_ids], f)
52 | else:
53 | with open(self._cache_file, "rb") as f:
54 | self._detections, self._image_ids = pickle.load(f)
55 |
56 | def _extract_data(self):
57 | self._image_ids = []
58 | self._detections = {}
59 | subsets = os.listdir(os.path.join(self._anno_dir, self._dataset)) #["frankfurt", "lindau", "munster"]
60 | for ss in subsets:
61 | anno_dir = '{}/{}'.format(self._dataset, ss)
62 | for anno in os.listdir(os.path.join(self._anno_dir, anno_dir)):
63 | anno_file = os.path.join(self._anno_dir, '{}/{}'.format(anno_dir, anno))
64 | img_id = os.path.join(anno_dir, anno.replace("gtBboxCityPersons.json", "leftImg8bit.png"))
65 | self._image_ids.append(img_id)
66 | bboxes = []
67 | with open(anno_file, 'r') as f:
68 | anno_info = json.load(f)
69 | objs = anno_info["objects"]
70 | for obj in objs:
71 | if obj['label'] == 'pedestrian':
72 | bbox = obj['bbox']
73 | bboxVis = obj['bboxVis']
74 | if bboxVis[2]*bboxVis[3] * 1.0 / bbox[2] * bbox[3] > 0.4:
75 | bbox = np.array(bbox)
76 | bbox[2:] += bbox[:2]
77 | bboxes.append(bbox.tolist())
78 | bboxes = np.array(bboxes, dtype=float)
79 | if bboxes.size == 0:
80 | self._detections[img_id] = np.zeros((0, 5))
81 | else:
82 | self._detections[img_id] = np.hstack((bboxes, np.ones((len(bboxes), 1))))
83 |
84 | def detections(self, ind):
85 | image_id = self._image_ids[ind]
86 | detections = self._detections[image_id]
87 | return detections.astype(float).copy()
88 |
89 | def _to_float(self, x):
90 | return float(":.2f".format(x))
91 |
92 | def convert_to_dict(self, all_boxes):
93 | scores, bboxes, img_ids, clses = [], [], [], []
94 | for img_id in all_boxes:
95 | for cls_id in all_boxes[img_id]:
96 | dets = all_boxes[img_id][cls_id]
97 | img_ids.extend([img_id] * len(dets))
98 | clses.extend([cls_id] * len(dets))
99 | scores.append(dets[:, -1])
100 | bboxes.append(dets[:, :-1])
101 | scores = np.concatenate(scores, axis=0)
102 | bboxes = np.concatenate(bboxes, axis=0)
103 | detections = {"image_ids": img_ids,
104 | "category_ids": clses,
105 | "bboxes": bboxes,
106 | "confidences": scores}
107 | return detections
108 |
109 |
110 |
111 | def evaluate(self, detections, ovthresh=0.5):
112 | image_ids = detections['image_ids']
113 | bboxes = detections['bboxes']
114 | confidences = detections["confidences"]
115 | category_ids= detections["category_ids"] # only one class in our results
116 |
117 | # pre and rec
118 | sorted_ind = np.argsort(-confidences)
119 | bboxes = bboxes[sorted_ind, :]
120 | image_ids = [image_ids[x] for x in sorted_ind]
121 | nd = len(sorted_ind)
122 | tp, fp = np.zeros(nd), np.zeros(nd)
123 |
124 | nps = 0
125 | R_dets = {}
126 | for id in image_ids:
127 | if id not in R_dets:
128 | R_dets[id] = np.zeros(len(self._detections[id]))
129 | nps += len(self._detections[id])
130 |
131 | for d in range(nd):
132 | R = self._detections[image_ids[d]]
133 | R_det = R_dets[image_ids[d]]
134 | bb = bboxes[d, :].astype(float)
135 | ovrmax = -np.inf
136 | BBGT = R[:, :4].astype(float)
137 |
138 | if BBGT.size > 0:
139 | xmin = np.maximum(BBGT[:, 0], bb[0])
140 | xmax = np.minimum(BBGT[:, 2], bb[2])
141 | ymin = np.maximum(BBGT[:, 1], bb[1])
142 | ymax = np.minimum(BBGT[:, 3], bb[3])
143 | w = np.maximum(xmax - xmin + 1, 0.)
144 | h = np.maximum(ymax - ymin + 1, 0, )
145 | inters = w * h # intersection
146 | unions = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \
147 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] -
148 | BBGT[:, 1] + 1.) - inters
149 |
150 | ious = inters / unions
151 | ovrmax = np.max(ious)
152 | jmax = np.argmax(ious)
153 | if ovrmax > ovthresh:
154 | if R_det[jmax] == 0:
155 | tp[d] = 1
156 | R_det[jmax] = 1
157 | else:
158 | fp[d] = 1
159 | else:
160 | fp[d] = 1
161 | fp = np.cumsum(fp)
162 | tp = np.cumsum(tp)
163 | rec = tp/float(nps)
164 | pre = tp/np.maximum(tp + fp, np.finfo(np.float64).eps)
165 |
166 | def voc_ap(rec, pre, use_07_metric=False):
167 | """Compute VOC AP given precision and recall.
168 | If use_07_metric is true, uses the VOC 07 11-point method (default: False)"""
169 | if use_07_metric:
170 | ap = 0.
171 | for t in np.arange(0., 1.1, 0.1):
172 | if np.sum(rec >= t) == 0:
173 | p = 0
174 | else:
175 | p = np.max(pre[rec >= t])
176 | ap = ap + p / 11.
177 | else:
178 | # first append sentinel values at the end
179 | mrec = np.concatenate(([0.], rec, [1.]))
180 | mpre = np.concatenate(([0.], pre, [0.]))
181 | # compute the precision,
182 | for i in range(mpre.size - 1, 0, -1):
183 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
184 | i = np.where(mrec[1:] != mrec[:-1])[0]
185 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
186 | return ap
187 | print("The final evaluated AP: {}".format(voc_ap(rec, pre)))
188 |
189 |
190 | if __name__=='__main__':
191 | import cv2
192 | os.chdir('../')
193 |
194 | cfg_file = os.path.join(system_configs.config_dir, 'CenterNet-52.json')
195 | with open(cfg_file, 'r') as f:
196 | configs = json.load(f)
197 |
198 | configs['system']['snapshot_name'] = 'CenterNet-52'
199 | system_configs.update_config(configs['system'])
200 |
201 | val_split = system_configs.val_split
202 | val_db = CityPerson(configs['db'], val_split)
203 |
204 | ind = 1
205 | img_file = val_db.image_file(ind)
206 | detections = val_db.detections(ind)
207 | img = cv2.imread(img_file)
208 |
209 | for d in detections:
210 | cv2.rectangle(img, (int(d[0]), int(d[1])), (int(d[2]), int(d[3])), color=(0, 0, 255))
211 |
212 |
213 | cv2.imshow('test', img)
214 | cv2.waitKey(0)
--------------------------------------------------------------------------------
/db/datasets.py:
--------------------------------------------------------------------------------
1 | from db.cityperson import CityPerson
2 |
3 | datasets = {
4 | "CityPerson": CityPerson
5 | # "MCOCO": MCOCO
6 | }
--------------------------------------------------------------------------------
/db/detection.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from db.base import BASE
3 |
4 |
5 | class DETECTION(BASE):
6 | def __init__(self, db_config):
7 | super(DETECTION, self).__init__()
8 |
9 | self._configs["categories"] = 2
10 | self._configs["kp_categories"] = 1
11 | self._configs["rand_scales"] = [1]
12 | self._configs["rand_scale_min"] = 0.8
13 | self._configs["rand_scale_max"] = 1.4
14 | self._configs["rand_scale_step"] = 0.2
15 |
16 | self._configs["input_size"] = [511]
17 | self._configs["output_sizes"] = [[128, 128]]
18 |
19 | self._configs["nms_threshold"] = 0.5
20 | self._configs["max_per_image"] = 100
21 | self._configs["top_k"] = 100
22 | self._configs["ae_threshold"] = 0.5
23 | self._configs["nms_kernel"] = 3
24 |
25 | self._configs["nms_algorithm"] = "exp_soft_nms"
26 | self._configs["weight_exp"] = 8
27 | self._configs["merge_bbox"] = False
28 |
29 | self._configs["data_aug"] = True
30 | self._configs["lighting"] = True
31 |
32 | self._configs["border"] = 128
33 | self._configs["gaussian_bump"] = True
34 | self._configs["gaussian_iou"] = 0.7
35 | self._configs["gaussian_radius"] = -1
36 | self._configs["rand_crop"] = False
37 | self._configs["rand_color"] = False
38 | self._configs["rand_pushes"] = False
39 | self._configs["rand_samples"] = False
40 | self._configs["special_crop"] = False
41 |
42 | self._configs["test_scales"] = [1]
43 |
44 | self._train_cfg["rcnn"] = dict(
45 | assigner=dict(
46 | pos_iou_thr=0.5,
47 | neg_iou_thr=0.5,
48 | min_pos_iou=0.5,
49 | ignore_iof_thr=-1),
50 | sampler=dict(
51 | num=512,
52 | pos_fraction=0.25,
53 | neg_pos_ub=-1,
54 | add_gt_as_proposals=True,
55 | pos_balance_sampling=False,
56 | neg_balance_thr=0),
57 | mask_size=28,
58 | pos_weight=-1,
59 | debug=False)
60 |
61 | self._model['bbox_roi_extractor'] = dict(
62 | type='SingleRoIExtractor',
63 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
64 | out_channels=256,
65 | featmap_strides=[4])
66 |
67 | self._model['bbox_head'] = dict(
68 | type='SharedFCBBoxHead',
69 | num_fcs=2,
70 | in_channels=256,
71 | fc_out_channels=1024,
72 | roi_feat_size=7,
73 | num_classes=81,
74 | target_means=[0., 0., 0., 0.],
75 | target_stds=[0.1, 0.1, 0.2, 0.2],
76 | reg_class_agnostic=False)
77 |
78 | self.update_config(db_config)
79 |
80 | if self._configs["rand_scales"] is None:
81 | self._configs["rand_scales"] = np.arange(
82 | self._configs["rand_scale_min"],
83 | self._configs["rand_scale_max"],
84 | self._configs["rand_scale_step"]
85 | )
86 |
--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import cv2
4 | import time
5 | from tqdm import tqdm
6 | from test.vis_utils import draw_bboxes
7 | from test.detector import CenterNet as Detector
8 |
9 | os.environ['CUDA_VISIBLE_DEVICES']='2'
10 | detector = Detector("CenterNet-52", iter=10000)
11 | t0 = time.time()
12 | image_names = [img for img in os.listdir('data/demo') if img[-3:]=='jpg']
13 | for i in tqdm(range(len(image_names))):
14 | image = cv2.imread('data/demo/{}'.format(image_names[i]))
15 | bboxes = detector(image)
16 | image = draw_bboxes(image, bboxes)
17 | cv2.imwrite("tmp_squeeze/{}.jpg".format(str(i).zfill(6)), image)
18 | cv2.imshow('image', image)
19 | cv2.waitKey(10)
20 |
21 | t1 = time.time()
22 | print("speed: %f s"%((t1-t0)/100))
23 |
--------------------------------------------------------------------------------
/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 |
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 |
--------------------------------------------------------------------------------
/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 |
--------------------------------------------------------------------------------
/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/external/__init__.py
--------------------------------------------------------------------------------
/external/nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | # ----------------------------------------------------------
9 | # Soft-NMS: Improving Object Detection With One Line of Code
10 | # Copyright (c) University of Maryland, College Park
11 | # Licensed under The MIT License [see LICENSE for details]
12 | # Written by Navaneeth Bodla and Bharat Singh
13 | # ----------------------------------------------------------
14 |
15 | import numpy as np
16 | cimport numpy as np
17 |
18 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
19 | return a if a >= b else b
20 |
21 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
22 | return a if a <= b else b
23 |
24 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
25 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
26 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
27 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
28 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
29 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
30 |
31 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
32 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
33 |
34 | cdef int ndets = dets.shape[0]
35 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
36 | np.zeros((ndets), dtype=np.int)
37 |
38 | # nominal indices
39 | cdef int _i, _j
40 | # sorted indices
41 | cdef int i, j
42 | # temp variables for box i's (the box currently under consideration)
43 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
44 | # variables for computing overlap with box j (lower scoring box)
45 | cdef np.float32_t xx1, yy1, xx2, yy2
46 | cdef np.float32_t w, h
47 | cdef np.float32_t inter, ovr
48 |
49 | keep = []
50 | for _i in range(ndets):
51 | i = order[_i]
52 | if suppressed[i] == 1:
53 | continue
54 | keep.append(i)
55 | ix1 = x1[i]
56 | iy1 = y1[i]
57 | ix2 = x2[i]
58 | iy2 = y2[i]
59 | iarea = areas[i]
60 | for _j in range(_i + 1, ndets):
61 | j = order[_j]
62 | if suppressed[j] == 1:
63 | continue
64 | xx1 = max(ix1, x1[j])
65 | yy1 = max(iy1, y1[j])
66 | xx2 = min(ix2, x2[j])
67 | yy2 = min(iy2, y2[j])
68 | w = max(0.0, xx2 - xx1 + 1)
69 | h = max(0.0, yy2 - yy1 + 1)
70 | inter = w * h
71 | ovr = inter / (iarea + areas[j] - inter)
72 | if ovr >= thresh:
73 | suppressed[j] = 1
74 |
75 | return keep
76 |
77 | def soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
78 | cdef unsigned int N = boxes.shape[0]
79 | cdef float iw, ih, box_area
80 | cdef float ua
81 | cdef int pos = 0
82 | cdef float maxscore = 0
83 | cdef int maxpos = 0
84 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
85 |
86 | for i in range(N):
87 | maxscore = boxes[i, 4]
88 | maxpos = i
89 |
90 | tx1 = boxes[i,0]
91 | ty1 = boxes[i,1]
92 | tx2 = boxes[i,2]
93 | ty2 = boxes[i,3]
94 | ts = boxes[i,4]
95 |
96 | pos = i + 1
97 | # get max box
98 | while pos < N:
99 | if maxscore < boxes[pos, 4]:
100 | maxscore = boxes[pos, 4]
101 | maxpos = pos
102 | pos = pos + 1
103 |
104 | # add max box as a detection
105 | boxes[i,0] = boxes[maxpos,0]
106 | boxes[i,1] = boxes[maxpos,1]
107 | boxes[i,2] = boxes[maxpos,2]
108 | boxes[i,3] = boxes[maxpos,3]
109 | boxes[i,4] = boxes[maxpos,4]
110 |
111 | # swap ith box with position of max box
112 | boxes[maxpos,0] = tx1
113 | boxes[maxpos,1] = ty1
114 | boxes[maxpos,2] = tx2
115 | boxes[maxpos,3] = ty2
116 | boxes[maxpos,4] = ts
117 |
118 | tx1 = boxes[i,0]
119 | ty1 = boxes[i,1]
120 | tx2 = boxes[i,2]
121 | ty2 = boxes[i,3]
122 | ts = boxes[i,4]
123 |
124 | pos = i + 1
125 | # NMS iterations, note that N changes if detection boxes fall below threshold
126 | while pos < N:
127 | x1 = boxes[pos, 0]
128 | y1 = boxes[pos, 1]
129 | x2 = boxes[pos, 2]
130 | y2 = boxes[pos, 3]
131 | s = boxes[pos, 4]
132 |
133 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
134 | iw = (min(tx2, x2) - max(tx1, x1) + 1)
135 | if iw > 0:
136 | ih = (min(ty2, y2) - max(ty1, y1) + 1)
137 | if ih > 0:
138 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
139 | ov = iw * ih / ua #iou between max box and detection box
140 |
141 | if method == 1: # linear
142 | if ov > Nt:
143 | weight = 1 - ov
144 | else:
145 | weight = 1
146 | elif method == 2: # gaussian
147 | weight = np.exp(-(ov * ov)/sigma)
148 | else: # original NMS
149 | if ov > Nt:
150 | weight = 0
151 | else:
152 | weight = 1
153 |
154 | boxes[pos, 4] = weight*boxes[pos, 4]
155 |
156 | # if box score falls below threshold, discard the box by swapping with last box
157 | # update N
158 | if boxes[pos, 4] < threshold:
159 | boxes[pos,0] = boxes[N-1, 0]
160 | boxes[pos,1] = boxes[N-1, 1]
161 | boxes[pos,2] = boxes[N-1, 2]
162 | boxes[pos,3] = boxes[N-1, 3]
163 | boxes[pos,4] = boxes[N-1, 4]
164 | N = N - 1
165 | pos = pos - 1
166 |
167 | pos = pos + 1
168 |
169 | keep = [i for i in range(N)]
170 | return keep
171 |
172 | def soft_nms_merge(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0, float weight_exp=6):
173 | cdef unsigned int N = boxes.shape[0]
174 | cdef float iw, ih, box_area
175 | cdef float ua
176 | cdef int pos = 0
177 | cdef float maxscore = 0
178 | cdef int maxpos = 0
179 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
180 | cdef float mx1,mx2,my1,my2,mts,mbs,mw
181 |
182 | for i in range(N):
183 | maxscore = boxes[i, 4]
184 | maxpos = i
185 |
186 | tx1 = boxes[i,0]
187 | ty1 = boxes[i,1]
188 | tx2 = boxes[i,2]
189 | ty2 = boxes[i,3]
190 | ts = boxes[i,4]
191 |
192 | pos = i + 1
193 | # get max box
194 | while pos < N:
195 | if maxscore < boxes[pos, 4]:
196 | maxscore = boxes[pos, 4]
197 | maxpos = pos
198 | pos = pos + 1
199 |
200 | # add max box as a detection
201 | boxes[i,0] = boxes[maxpos,0]
202 | boxes[i,1] = boxes[maxpos,1]
203 | boxes[i,2] = boxes[maxpos,2]
204 | boxes[i,3] = boxes[maxpos,3]
205 | boxes[i,4] = boxes[maxpos,4]
206 |
207 | mx1 = boxes[i, 0] * boxes[i, 5]
208 | my1 = boxes[i, 1] * boxes[i, 5]
209 | mx2 = boxes[i, 2] * boxes[i, 6]
210 | my2 = boxes[i, 3] * boxes[i, 6]
211 | mts = boxes[i, 5]
212 | mbs = boxes[i, 6]
213 |
214 | # swap ith box with position of max box
215 | boxes[maxpos,0] = tx1
216 | boxes[maxpos,1] = ty1
217 | boxes[maxpos,2] = tx2
218 | boxes[maxpos,3] = ty2
219 | boxes[maxpos,4] = ts
220 |
221 | tx1 = boxes[i,0]
222 | ty1 = boxes[i,1]
223 | tx2 = boxes[i,2]
224 | ty2 = boxes[i,3]
225 | ts = boxes[i,4]
226 |
227 | pos = i + 1
228 | # NMS iterations, note that N changes if detection boxes fall below threshold
229 | while pos < N:
230 | x1 = boxes[pos, 0]
231 | y1 = boxes[pos, 1]
232 | x2 = boxes[pos, 2]
233 | y2 = boxes[pos, 3]
234 | s = boxes[pos, 4]
235 |
236 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
237 | iw = (min(tx2, x2) - max(tx1, x1) + 1)
238 | if iw > 0:
239 | ih = (min(ty2, y2) - max(ty1, y1) + 1)
240 | if ih > 0:
241 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
242 | ov = iw * ih / ua #iou between max box and detection box
243 |
244 | if method == 1: # linear
245 | if ov > Nt:
246 | weight = 1 - ov
247 | else:
248 | weight = 1
249 | elif method == 2: # gaussian
250 | weight = np.exp(-(ov * ov)/sigma)
251 | else: # original NMS
252 | if ov > Nt:
253 | weight = 0
254 | else:
255 | weight = 1
256 |
257 | mw = (1 - weight) ** weight_exp
258 | mx1 = mx1 + boxes[pos, 0] * boxes[pos, 5] * mw
259 | my1 = my1 + boxes[pos, 1] * boxes[pos, 5] * mw
260 | mx2 = mx2 + boxes[pos, 2] * boxes[pos, 6] * mw
261 | my2 = my2 + boxes[pos, 3] * boxes[pos, 6] * mw
262 | mts = mts + boxes[pos, 5] * mw
263 | mbs = mbs + boxes[pos, 6] * mw
264 |
265 | boxes[pos, 4] = weight*boxes[pos, 4]
266 |
267 | # if box score falls below threshold, discard the box by swapping with last box
268 | # update N
269 | if boxes[pos, 4] < threshold:
270 | boxes[pos,0] = boxes[N-1, 0]
271 | boxes[pos,1] = boxes[N-1, 1]
272 | boxes[pos,2] = boxes[N-1, 2]
273 | boxes[pos,3] = boxes[N-1, 3]
274 | boxes[pos,4] = boxes[N-1, 4]
275 | N = N - 1
276 | pos = pos - 1
277 |
278 | pos = pos + 1
279 |
280 | boxes[i, 0] = mx1 / mts
281 | boxes[i, 1] = my1 / mts
282 | boxes[i, 2] = mx2 / mbs
283 | boxes[i, 3] = my2 / mbs
284 |
285 | keep = [i for i in range(N)]
286 | return keep
287 |
--------------------------------------------------------------------------------
/external/nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/external/nms.so
--------------------------------------------------------------------------------
/external/setup.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | from distutils.core import setup
3 | from distutils.extension import Extension
4 | from Cython.Build import cythonize
5 |
6 | extensions = [
7 | Extension(
8 | "nms",
9 | ["nms.pyx"],
10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 | )
12 | ]
13 |
14 | setup(
15 | name="coco",
16 | ext_modules=cythonize(extensions),
17 | include_dirs=[numpy.get_include()]
18 | )
19 |
--------------------------------------------------------------------------------
/models/CenterNet-104.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .py_utils import kp, AELoss, _neg_loss, convolution, residual
5 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool
6 |
7 | class pool(nn.Module):
8 | def __init__(self, dim, pool1, pool2):
9 | super(pool, self).__init__()
10 | self.p1_conv1 = convolution(3, dim, 128)
11 | self.p2_conv1 = convolution(3, dim, 128)
12 |
13 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
14 | self.p_bn1 = nn.BatchNorm2d(dim)
15 |
16 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
17 | self.bn1 = nn.BatchNorm2d(dim)
18 | self.relu1 = nn.ReLU(inplace=True)
19 |
20 | self.conv2 = convolution(3, dim, dim)
21 |
22 | self.pool1 = pool1()
23 | self.pool2 = pool2()
24 |
25 | self.look_conv1 = convolution(3, dim, 128)
26 | self.look_conv2 = convolution(3, dim, 128)
27 | self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
28 | self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
29 |
30 | def forward(self, x):
31 | # pool 1
32 | look_conv1 = self.look_conv1(x)
33 | p1_conv1 = self.p1_conv1(x)
34 | look_right = self.pool2(look_conv1)
35 | P1_look_conv = self.P1_look_conv(p1_conv1+look_right)
36 | pool1 = self.pool1(P1_look_conv)
37 |
38 | # pool 2
39 | look_conv2 = self.look_conv2(x)
40 | p2_conv1 = self.p2_conv1(x)
41 | look_down = self.pool1(look_conv2)
42 | P2_look_conv = self.P2_look_conv(p2_conv1+look_down)
43 | pool2 = self.pool2(P2_look_conv)
44 |
45 | # pool 1 + pool 2
46 | p_conv1 = self.p_conv1(pool1 + pool2)
47 | p_bn1 = self.p_bn1(p_conv1)
48 |
49 | conv1 = self.conv1(x)
50 | bn1 = self.bn1(conv1)
51 | relu1 = self.relu1(p_bn1 + bn1)
52 |
53 | conv2 = self.conv2(relu1)
54 | return conv2
55 |
56 | class pool_cross(nn.Module):
57 | def __init__(self, dim, pool1, pool2, pool3, pool4):
58 | super(pool_cross, self).__init__()
59 | self.p1_conv1 = convolution(3, dim, 128)
60 | self.p2_conv1 = convolution(3, dim, 128)
61 |
62 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
63 | self.p_bn1 = nn.BatchNorm2d(dim)
64 |
65 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
66 | self.bn1 = nn.BatchNorm2d(dim)
67 | self.relu1 = nn.ReLU(inplace=True)
68 |
69 | self.conv2 = convolution(3, dim, dim)
70 |
71 | self.pool1 = pool1()
72 | self.pool2 = pool2()
73 | self.pool3 = pool3()
74 | self.pool4 = pool4()
75 |
76 | def forward(self, x):
77 | # pool 1
78 | p1_conv1 = self.p1_conv1(x)
79 | pool1 = self.pool1(p1_conv1)
80 | pool1 = self.pool3(pool1)
81 |
82 | # pool 2
83 | p2_conv1 = self.p2_conv1(x)
84 | pool2 = self.pool2(p2_conv1)
85 | pool2 = self.pool4(pool2)
86 |
87 | # pool 1 + pool 2
88 | p_conv1 = self.p_conv1(pool1 + pool2)
89 | p_bn1 = self.p_bn1(p_conv1)
90 |
91 | conv1 = self.conv1(x)
92 | bn1 = self.bn1(conv1)
93 | relu1 = self.relu1(p_bn1 + bn1)
94 |
95 | conv2 = self.conv2(relu1)
96 | return conv2
97 |
98 | class tl_pool(pool):
99 | def __init__(self, dim):
100 | super(tl_pool, self).__init__(dim, TopPool, LeftPool)
101 |
102 | class br_pool(pool):
103 | def __init__(self, dim):
104 | super(br_pool, self).__init__(dim, BottomPool, RightPool)
105 |
106 | class center_pool(pool_cross):
107 | def __init__(self, dim):
108 | super(center_pool, self).__init__(dim, TopPool, LeftPool, BottomPool, RightPool)
109 |
110 | def make_tl_layer(dim):
111 | return tl_pool(dim)
112 |
113 | def make_br_layer(dim):
114 | return br_pool(dim)
115 |
116 | def make_ct_layer(dim):
117 | return center_pool(dim)
118 |
119 | def make_pool_layer(dim):
120 | return nn.Sequential()
121 |
122 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
123 | layers = [layer(kernel, dim0, dim1, stride=2)]
124 | layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
125 | return nn.Sequential(*layers)
126 |
127 | class model(kp):
128 | def __init__(self, db):
129 | n = 5
130 | dims = [256, 256, 384, 384, 384, 512]
131 | modules = [2, 2, 2, 2, 2, 4]
132 | out_dim = 1
133 |
134 | super(model, self).__init__(
135 | db, n, 2, dims, modules, out_dim,
136 | make_tl_layer=make_tl_layer,
137 | make_br_layer=make_br_layer,
138 | make_ct_layer=make_ct_layer,
139 | make_pool_layer=make_pool_layer,
140 | make_hg_layer=make_hg_layer,
141 | kp_layer=residual, cnv_dim=256
142 | )
143 |
144 | loss = AELoss(pull_weight=1e-1, push_weight=1e-1, focal_loss=_neg_loss)
145 |
--------------------------------------------------------------------------------
/models/CenterNet-52.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .py_utils import kp, AELoss, _neg_loss, convolution, residual
5 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool
6 |
7 | class pool(nn.Module):
8 | def __init__(self, dim, pool1, pool2):
9 | super(pool, self).__init__()
10 | self.p1_conv1 = convolution(3, dim, 128)
11 | self.p2_conv1 = convolution(3, dim, 128)
12 |
13 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
14 | self.p_bn1 = nn.BatchNorm2d(dim)
15 |
16 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
17 | self.bn1 = nn.BatchNorm2d(dim)
18 | self.relu1 = nn.ReLU(inplace=True)
19 |
20 | self.conv2 = convolution(3, dim, dim)
21 |
22 | self.pool1 = pool1()
23 | self.pool2 = pool2()
24 |
25 | self.look_conv1 = convolution(3, dim, 128)
26 | self.look_conv2 = convolution(3, dim, 128)
27 | self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
28 | self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
29 |
30 | def forward(self, x):
31 | # pool 1
32 | look_conv1 = self.look_conv1(x)
33 | p1_conv1 = self.p1_conv1(x)
34 | look_right = self.pool2(look_conv1)
35 | P1_look_conv = self.P1_look_conv(p1_conv1+look_right)
36 | pool1 = self.pool1(P1_look_conv)
37 |
38 | # pool 2
39 | look_conv2 = self.look_conv2(x)
40 | p2_conv1 = self.p2_conv1(x)
41 | look_down = self.pool1(look_conv2)
42 | P2_look_conv = self.P2_look_conv(p2_conv1+look_down)
43 | pool2 = self.pool2(P2_look_conv)
44 |
45 | # pool 1 + pool 2
46 | p_conv1 = self.p_conv1(pool1 + pool2)
47 | p_bn1 = self.p_bn1(p_conv1)
48 |
49 | conv1 = self.conv1(x)
50 | bn1 = self.bn1(conv1)
51 | relu1 = self.relu1(p_bn1 + bn1)
52 |
53 | conv2 = self.conv2(relu1)
54 | return conv2
55 |
56 | class pool_cross(nn.Module):
57 | def __init__(self, dim, pool1, pool2, pool3, pool4):
58 | super(pool_cross, self).__init__()
59 | self.p1_conv1 = convolution(3, dim, 128)
60 | self.p2_conv1 = convolution(3, dim, 128)
61 |
62 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
63 | self.p_bn1 = nn.BatchNorm2d(dim)
64 |
65 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
66 | self.bn1 = nn.BatchNorm2d(dim)
67 | self.relu1 = nn.ReLU(inplace=True)
68 |
69 | self.conv2 = convolution(3, dim, dim)
70 |
71 | self.pool1 = pool1()
72 | self.pool2 = pool2()
73 | self.pool3 = pool3()
74 | self.pool4 = pool4()
75 |
76 | def forward(self, x):
77 | # pool 1
78 | p1_conv1 = self.p1_conv1(x)
79 | pool1 = self.pool1(p1_conv1)
80 | pool1 = self.pool3(pool1)
81 |
82 | # pool 2
83 | p2_conv1 = self.p2_conv1(x)
84 | pool2 = self.pool2(p2_conv1)
85 | pool2 = self.pool4(pool2)
86 |
87 | # pool 1 + pool 2
88 | p_conv1 = self.p_conv1(pool1 + pool2)
89 | p_bn1 = self.p_bn1(p_conv1)
90 |
91 | conv1 = self.conv1(x)
92 | bn1 = self.bn1(conv1)
93 | relu1 = self.relu1(p_bn1 + bn1)
94 |
95 | conv2 = self.conv2(relu1)
96 | return conv2
97 |
98 | class tl_pool(pool):
99 | def __init__(self, dim):
100 | super(tl_pool, self).__init__(dim, TopPool, LeftPool)
101 |
102 | class br_pool(pool):
103 | def __init__(self, dim):
104 | super(br_pool, self).__init__(dim, BottomPool, RightPool)
105 |
106 | class center_pool(pool_cross):
107 | def __init__(self, dim):
108 | super(center_pool, self).__init__(dim, TopPool, LeftPool, BottomPool, RightPool)
109 |
110 | def make_tl_layer(dim):
111 | return tl_pool(dim)
112 |
113 | def make_br_layer(dim):
114 | return br_pool(dim)
115 |
116 | def make_ct_layer(dim):
117 | return center_pool(dim)
118 |
119 | def make_pool_layer(dim):
120 | return nn.Sequential()
121 |
122 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
123 | layers = [layer(kernel, dim0, dim1, stride=2)]
124 | layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
125 | return nn.Sequential(*layers)
126 |
127 | class model(kp):
128 | def __init__(self, db):
129 | n = 5
130 | dims = [256, 256, 384, 384, 384, 512]
131 | modules = [2, 2, 2, 2, 2, 4]
132 | out_dim = 1 # categatory num
133 |
134 | super(model, self).__init__(
135 | db, n, 1, dims, modules, out_dim,
136 | make_tl_layer=make_tl_layer,
137 | make_br_layer=make_br_layer,
138 | make_ct_layer=make_ct_layer,
139 | make_pool_layer=make_pool_layer,
140 | make_hg_layer=make_hg_layer,
141 | kp_layer=residual, cnv_dim=256
142 | )
143 |
144 | loss = AELoss(pull_weight=1e-1, push_weight=1e-1, focal_loss=_neg_loss)
145 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/models/__init__.py
--------------------------------------------------------------------------------
/models/py_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .kp import kp, AELoss
2 | from .kp_utils import _neg_loss
3 |
4 | from .utils import convolution, fully_connected, residual
5 |
6 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool
7 |
--------------------------------------------------------------------------------
/models/py_utils/_cpools/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | cpools.egg-info/
3 | dist/
4 |
--------------------------------------------------------------------------------
/models/py_utils/_cpools/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from torch import nn
4 | from torch.autograd import Function
5 | import sys
6 | import os
7 | sys.path.append(os.path.join(os.path.dirname(__file__),'dist/cpools-0.0.0-py3.6-linux-x86_64.egg'))
8 | import top_pool, bottom_pool, left_pool, right_pool
9 |
10 | class TopPoolFunction(Function):
11 | @staticmethod
12 | def forward(ctx, input):
13 | output = top_pool.forward(input)[0]
14 | ctx.save_for_backward(input)
15 | return output
16 |
17 | @staticmethod
18 | def backward(ctx, grad_output):
19 | input = ctx.saved_variables[0]
20 | output = top_pool.backward(input, grad_output)[0]
21 | return output
22 |
23 | class BottomPoolFunction(Function):
24 | @staticmethod
25 | def forward(ctx, input):
26 | output = bottom_pool.forward(input)[0]
27 | ctx.save_for_backward(input)
28 | return output
29 |
30 | @staticmethod
31 | def backward(ctx, grad_output):
32 | input = ctx.saved_variables[0]
33 | output = bottom_pool.backward(input, grad_output)[0]
34 | return output
35 |
36 | class LeftPoolFunction(Function):
37 | @staticmethod
38 | def forward(ctx, input):
39 | output = left_pool.forward(input)[0]
40 | ctx.save_for_backward(input)
41 | return output
42 |
43 | @staticmethod
44 | def backward(ctx, grad_output):
45 | input = ctx.saved_variables[0]
46 | output = left_pool.backward(input, grad_output)[0]
47 | return output
48 |
49 | class RightPoolFunction(Function):
50 | @staticmethod
51 | def forward(ctx, input):
52 | output = right_pool.forward(input)[0]
53 | ctx.save_for_backward(input)
54 | return output
55 |
56 | @staticmethod
57 | def backward(ctx, grad_output):
58 | input = ctx.saved_variables[0]
59 | output = right_pool.backward(input, grad_output)[0]
60 | return output
61 |
62 | class TopPool(nn.Module):
63 | def forward(self, x):
64 | return TopPoolFunction.apply(x)
65 |
66 | class BottomPool(nn.Module):
67 | def forward(self, x):
68 | return BottomPoolFunction.apply(x)
69 |
70 | class LeftPool(nn.Module):
71 | def forward(self, x):
72 | return LeftPoolFunction.apply(x)
73 |
74 | class RightPool(nn.Module):
75 | def forward(self, x):
76 | return RightPoolFunction.apply(x)
77 |
--------------------------------------------------------------------------------
/models/py_utils/_cpools/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CppExtension
3 |
4 | setup(
5 | name="cpools",
6 | ext_modules=[
7 | CppExtension("top_pool", ["src/top_pool.cpp"]),
8 | CppExtension("bottom_pool", ["src/bottom_pool.cpp"]),
9 | CppExtension("left_pool", ["src/left_pool.cpp"]),
10 | CppExtension("right_pool", ["src/right_pool.cpp"])
11 | ],
12 | cmdclass={
13 | "build_ext": BuildExtension
14 | }
15 | )
16 |
--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/bottom_pool.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 |
5 | std::vector pool_forward(
6 | at::Tensor input
7 | ) {
8 | // Initialize output
9 | at::Tensor output = at::zeros_like(input);
10 |
11 | // Get height
12 | int64_t height = input.size(2);
13 |
14 | // Copy the last column
15 | at::Tensor input_temp = input.select(2, 0);
16 | at::Tensor output_temp = output.select(2, 0);
17 | output_temp.copy_(input_temp);
18 |
19 | at::Tensor max_temp;
20 | for (int64_t ind = 0; ind < height - 1; ++ind) {
21 | input_temp = input.select(2, ind + 1);
22 | output_temp = output.select(2, ind);
23 | max_temp = output.select(2, ind + 1);
24 |
25 | at::max_out(max_temp, input_temp, output_temp);
26 | }
27 |
28 | return {
29 | output
30 | };
31 | }
32 |
33 | std::vector pool_backward(
34 | at::Tensor input,
35 | at::Tensor grad_output
36 | ) {
37 | auto output = at::zeros_like(input);
38 |
39 | int32_t batch = input.size(0);
40 | int32_t channel = input.size(1);
41 | int32_t height = input.size(2);
42 | int32_t width = input.size(3);
43 |
44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, width});
46 |
47 | auto input_temp = input.select(2, 0);
48 | max_val.copy_(input_temp);
49 |
50 | max_ind.fill_(0);
51 |
52 | auto output_temp = output.select(2, 0);
53 | auto grad_output_temp = grad_output.select(2, 0);
54 | output_temp.copy_(grad_output_temp);
55 |
56 | auto un_max_ind = max_ind.unsqueeze(2);
57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, width});
58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
59 | for (int32_t ind = 0; ind < height - 1; ++ind) {
60 | input_temp = input.select(2, ind + 1);
61 | at::gt_out(gt_mask, input_temp, max_val);
62 |
63 | at::masked_select_out(max_temp, input_temp, gt_mask);
64 | max_val.masked_scatter_(gt_mask, max_temp);
65 | max_ind.masked_fill_(gt_mask, ind + 1);
66 |
67 | grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
68 | output.scatter_add_(2, un_max_ind, grad_output_temp);
69 | }
70 |
71 | return {
72 | output
73 | };
74 | }
75 |
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 | m.def(
78 | "forward", &pool_forward, "Bottom Pool Forward",
79 | py::call_guard()
80 | );
81 | m.def(
82 | "backward", &pool_backward, "Bottom Pool Backward",
83 | py::call_guard()
84 | );
85 | }
86 |
--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/left_pool.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 |
5 | std::vector pool_forward(
6 | at::Tensor input
7 | ) {
8 | // Initialize output
9 | at::Tensor output = at::zeros_like(input);
10 |
11 | // Get width
12 | int64_t width = input.size(3);
13 |
14 | // Copy the last column
15 | at::Tensor input_temp = input.select(3, width - 1);
16 | at::Tensor output_temp = output.select(3, width - 1);
17 | output_temp.copy_(input_temp);
18 |
19 | at::Tensor max_temp;
20 | for (int64_t ind = 1; ind < width; ++ind) {
21 | input_temp = input.select(3, width - ind - 1);
22 | output_temp = output.select(3, width - ind);
23 | max_temp = output.select(3, width - ind - 1);
24 |
25 | at::max_out(max_temp, input_temp, output_temp);
26 | }
27 |
28 | return {
29 | output
30 | };
31 | }
32 |
33 | std::vector pool_backward(
34 | at::Tensor input,
35 | at::Tensor grad_output
36 | ) {
37 | auto output = at::zeros_like(input);
38 |
39 | int32_t batch = input.size(0);
40 | int32_t channel = input.size(1);
41 | int32_t height = input.size(2);
42 | int32_t width = input.size(3);
43 |
44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, height});
46 |
47 | auto input_temp = input.select(3, width - 1);
48 | max_val.copy_(input_temp);
49 |
50 | max_ind.fill_(width - 1);
51 |
52 | auto output_temp = output.select(3, width - 1);
53 | auto grad_output_temp = grad_output.select(3, width - 1);
54 | output_temp.copy_(grad_output_temp);
55 |
56 | auto un_max_ind = max_ind.unsqueeze(3);
57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, height});
58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
59 | for (int32_t ind = 1; ind < width; ++ind) {
60 | input_temp = input.select(3, width - ind - 1);
61 | at::gt_out(gt_mask, input_temp, max_val);
62 |
63 | at::masked_select_out(max_temp, input_temp, gt_mask);
64 | max_val.masked_scatter_(gt_mask, max_temp);
65 | max_ind.masked_fill_(gt_mask, width - ind - 1);
66 |
67 | grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
68 | output.scatter_add_(3, un_max_ind, grad_output_temp);
69 | }
70 |
71 | return {
72 | output
73 | };
74 | }
75 |
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 | m.def(
78 | "forward", &pool_forward, "Left Pool Forward",
79 | py::call_guard()
80 | );
81 | m.def(
82 | "backward", &pool_backward, "Left Pool Backward",
83 | py::call_guard()
84 | );
85 | }
86 |
--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/right_pool.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 |
5 | std::vector pool_forward(
6 | at::Tensor input
7 | ) {
8 | // Initialize output
9 | at::Tensor output = at::zeros_like(input);
10 |
11 | // Get width
12 | int64_t width = input.size(3);
13 |
14 | // Copy the last column
15 | at::Tensor input_temp = input.select(3, 0);
16 | at::Tensor output_temp = output.select(3, 0);
17 | output_temp.copy_(input_temp);
18 |
19 | at::Tensor max_temp;
20 | for (int64_t ind = 0; ind < width - 1; ++ind) {
21 | input_temp = input.select(3, ind + 1);
22 | output_temp = output.select(3, ind);
23 | max_temp = output.select(3, ind + 1);
24 |
25 | at::max_out(max_temp, input_temp, output_temp);
26 | }
27 |
28 | return {
29 | output
30 | };
31 | }
32 |
33 | std::vector pool_backward(
34 | at::Tensor input,
35 | at::Tensor grad_output
36 | ) {
37 | at::Tensor output = at::zeros_like(input);
38 |
39 | int32_t batch = input.size(0);
40 | int32_t channel = input.size(1);
41 | int32_t height = input.size(2);
42 | int32_t width = input.size(3);
43 |
44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, height});
46 |
47 | auto input_temp = input.select(3, 0);
48 | max_val.copy_(input_temp);
49 |
50 | max_ind.fill_(0);
51 |
52 | auto output_temp = output.select(3, 0);
53 | auto grad_output_temp = grad_output.select(3, 0);
54 | output_temp.copy_(grad_output_temp);
55 |
56 | auto un_max_ind = max_ind.unsqueeze(3);
57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, height});
58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
59 | for (int32_t ind = 0; ind < width - 1; ++ind) {
60 | input_temp = input.select(3, ind + 1);
61 | at::gt_out(gt_mask, input_temp, max_val);
62 |
63 | at::masked_select_out(max_temp, input_temp, gt_mask);
64 | max_val.masked_scatter_(gt_mask, max_temp);
65 | max_ind.masked_fill_(gt_mask, ind + 1);
66 |
67 | grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
68 | output.scatter_add_(3, un_max_ind, grad_output_temp);
69 | }
70 |
71 | return {
72 | output
73 | };
74 | }
75 |
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 | m.def(
78 | "forward", &pool_forward, "Right Pool Forward",
79 | py::call_guard()
80 | );
81 | m.def(
82 | "backward", &pool_backward, "Right Pool Backward",
83 | py::call_guard()
84 | );
85 | }
86 |
--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/top_pool.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #include
4 |
5 | std::vector top_pool_forward(
6 | at::Tensor input
7 | ) {
8 | // Initialize output
9 | at::Tensor output = at::zeros_like(input);
10 |
11 | // Get height
12 | int64_t height = input.size(2);
13 |
14 | // Copy the last column
15 | at::Tensor input_temp = input.select(2, height - 1);
16 | at::Tensor output_temp = output.select(2, height - 1);
17 | output_temp.copy_(input_temp);
18 |
19 | at::Tensor max_temp;
20 | for (int64_t ind = 1; ind < height; ++ind) {
21 | input_temp = input.select(2, height - ind - 1);
22 | output_temp = output.select(2, height - ind);
23 | max_temp = output.select(2, height - ind - 1);
24 |
25 | at::max_out(max_temp, input_temp, output_temp);
26 | }
27 |
28 | return {
29 | output
30 | };
31 | }
32 |
33 | std::vector top_pool_backward(
34 | at::Tensor input,
35 | at::Tensor grad_output
36 | ) {
37 | auto output = at::zeros_like(input);
38 |
39 | int32_t batch = input.size(0);
40 | int32_t channel = input.size(1);
41 | int32_t height = input.size(2);
42 | int32_t width = input.size(3);
43 |
44 | auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
45 | auto max_ind = at::zeros(torch::CUDA(at::kLong), {batch, channel, width});
46 |
47 | auto input_temp = input.select(2, height - 1);
48 | max_val.copy_(input_temp);
49 |
50 | max_ind.fill_(height - 1);
51 |
52 | auto output_temp = output.select(2, height - 1);
53 | auto grad_output_temp = grad_output.select(2, height - 1);
54 | output_temp.copy_(grad_output_temp);
55 |
56 | auto un_max_ind = max_ind.unsqueeze(2);
57 | auto gt_mask = at::zeros(torch::CUDA(at::kByte), {batch, channel, width});
58 | auto max_temp = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
59 | for (int32_t ind = 1; ind < height; ++ind) {
60 | input_temp = input.select(2, height - ind - 1);
61 | at::gt_out(gt_mask, input_temp, max_val);
62 |
63 | at::masked_select_out(max_temp, input_temp, gt_mask);
64 | max_val.masked_scatter_(gt_mask, max_temp);
65 | max_ind.masked_fill_(gt_mask, height - ind - 1);
66 |
67 | grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
68 | output.scatter_add_(2, un_max_ind, grad_output_temp);
69 | }
70 |
71 | return {
72 | output
73 | };
74 | }
75 |
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 | m.def(
78 | "forward", &top_pool_forward, "Top Pool Forward",
79 | py::call_guard()
80 | );
81 | m.def(
82 | "backward", &top_pool_backward, "Top Pool Backward",
83 | py::call_guard()
84 | );
85 | }
86 |
--------------------------------------------------------------------------------
/models/py_utils/data_parallel.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.modules import Module
3 | from torch.nn.parallel.scatter_gather import gather
4 | from torch.nn.parallel.replicate import replicate
5 | from torch.nn.parallel.parallel_apply import parallel_apply
6 |
7 | from .scatter_gather import scatter_kwargs
8 |
9 | class DataParallel(Module):
10 | r"""Implements data parallelism at the module level.
11 |
12 | This container parallelizes the application of the given module by
13 | splitting the input across the specified devices by chunking in the batch
14 | dimension. In the forward pass, the module is replicated on each device,
15 | and each replica handles a portion of the input. During the backwards
16 | pass, gradients from each replica are summed into the original module.
17 |
18 | The batch size should be larger than the number of GPUs used. It should
19 | also be an integer multiple of the number of GPUs so that each chunk is the
20 | same size (so that each GPU processes the same number of samples).
21 |
22 | See also: :ref:`cuda-nn-dataparallel-instead`
23 |
24 | Arbitrary positional and keyword inputs are allowed to be passed into
25 | DataParallel EXCEPT Tensors. All variables will be scattered on dim
26 | specified (default 0). Primitive types will be broadcasted, but all
27 | other types will be a shallow copy and can be corrupted if written to in
28 | the model's forward pass.
29 |
30 | Args:
31 | module: module to be parallelized
32 | device_ids: CUDA devices (default: all devices)
33 | output_device: device location of output (default: device_ids[0])
34 |
35 | Example::
36 |
37 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
38 | >>> output = net(input_var)
39 | """
40 |
41 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
42 |
43 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
44 | super(DataParallel, self).__init__()
45 |
46 | if not torch.cuda.is_available():
47 | self.module = module
48 | self.device_ids = []
49 | return
50 |
51 | if device_ids is None:
52 | device_ids = list(range(torch.cuda.device_count()))
53 | if output_device is None:
54 | output_device = device_ids[0]
55 | self.dim = dim
56 | self.module = module
57 | self.device_ids = device_ids
58 | self.chunk_sizes = chunk_sizes
59 | self.output_device = output_device
60 | if len(self.device_ids) == 1:
61 | self.module.cuda(device_ids[0])
62 |
63 | def forward(self, *inputs, **kwargs):
64 | if not self.device_ids:
65 | return self.module(*inputs, **kwargs)
66 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
67 | if len(self.device_ids) == 1:
68 | return self.module(*inputs[0], **kwargs[0])
69 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
70 | outputs = self.parallel_apply(replicas, inputs, kwargs)
71 | return self.gather(outputs, self.output_device)
72 |
73 | def replicate(self, module, device_ids):
74 | return replicate(module, device_ids)
75 |
76 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
77 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
78 |
79 | def parallel_apply(self, replicas, inputs, kwargs):
80 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
81 |
82 | def gather(self, outputs, output_device):
83 | return gather(outputs, output_device, dim=self.dim)
84 |
85 |
86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
87 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
88 |
89 | This is the functional version of the DataParallel module.
90 |
91 | Args:
92 | module: the module to evaluate in parallel
93 | inputs: inputs to the module
94 | device_ids: GPU ids on which to replicate module
95 | output_device: GPU location of the output Use -1 to indicate the CPU.
96 | (default: device_ids[0])
97 | Returns:
98 | a Variable containing the result of module(input) located on
99 | output_device
100 | """
101 | if not isinstance(inputs, tuple):
102 | inputs = (inputs,)
103 |
104 | if device_ids is None:
105 | device_ids = list(range(torch.cuda.device_count()))
106 |
107 | if output_device is None:
108 | output_device = device_ids[0]
109 |
110 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
111 | if len(device_ids) == 1:
112 | return module(*inputs[0], **module_kwargs[0])
113 | used_device_ids = device_ids[:len(inputs)]
114 | replicas = replicate(module, used_device_ids)
115 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
116 | return gather(outputs, output_device, dim)
117 |
--------------------------------------------------------------------------------
/models/py_utils/kp.py:
--------------------------------------------------------------------------------
1 | import pdb
2 | import torch
3 |
4 | import numpy as np
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | from .utils import convolution, residual
9 | from .utils import make_layer, make_layer_revr
10 |
11 | from .kp_utils import _tranpose_and_gather_feat, _decode
12 | from .kp_utils import _sigmoid, _ae_loss, _regr_loss, _neg_loss
13 | from .kp_utils import make_tl_layer, make_br_layer, make_kp_layer, make_ct_layer
14 | from .kp_utils import make_pool_layer, make_unpool_layer
15 | from .kp_utils import make_merge_layer, make_inter_layer, make_cnv_layer
16 |
17 |
18 | class kp_module(nn.Module):
19 | def __init__(
20 | self, n, dims, modules, layer=residual,
21 | make_up_layer=make_layer, make_low_layer=make_layer,
22 | make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
23 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
24 | make_merge_layer=make_merge_layer, **kwargs
25 | ):
26 | super(kp_module, self).__init__()
27 |
28 | self.n = n
29 |
30 | curr_mod = modules[0]
31 | next_mod = modules[1]
32 |
33 | curr_dim = dims[0]
34 | next_dim = dims[1]
35 |
36 | self.up1 = make_up_layer(
37 | 3, curr_dim, curr_dim, curr_mod,
38 | layer=layer, **kwargs
39 | )
40 | self.max1 = make_pool_layer(curr_dim)
41 | self.low1 = make_hg_layer(
42 | 3, curr_dim, next_dim, curr_mod,
43 | layer=layer, **kwargs
44 | )
45 | self.low2 = kp_module(
46 | n - 1, dims[1:], modules[1:], layer=layer,
47 | make_up_layer=make_up_layer,
48 | make_low_layer=make_low_layer,
49 | make_hg_layer=make_hg_layer,
50 | make_hg_layer_revr=make_hg_layer_revr,
51 | make_pool_layer=make_pool_layer,
52 | make_unpool_layer=make_unpool_layer,
53 | make_merge_layer=make_merge_layer,
54 | **kwargs
55 | ) if self.n > 1 else \
56 | make_low_layer(
57 | 3, next_dim, next_dim, next_mod,
58 | layer=layer, **kwargs
59 | )
60 | self.low3 = make_hg_layer_revr(
61 | 3, next_dim, curr_dim, curr_mod,
62 | layer=layer, **kwargs
63 | )
64 | self.up2 = make_unpool_layer(curr_dim)
65 |
66 | self.merge = make_merge_layer(curr_dim)
67 |
68 | def forward(self, x):
69 | up1 = self.up1(x)
70 | max1 = self.max1(x)
71 | low1 = self.low1(max1)
72 | low2 = self.low2(low1)
73 | low3 = self.low3(low2)
74 | up2 = self.up2(low3)
75 | return self.merge(up1, up2)
76 |
77 |
78 | class kp(nn.Module):
79 | def __init__(
80 | self, db, n, nstack, dims, modules, out_dim, pre=None, cnv_dim=256,
81 | make_tl_layer=make_tl_layer, make_br_layer=make_br_layer, make_ct_layer=make_ct_layer,
82 | make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,
83 | make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,
84 | make_up_layer=make_layer, make_low_layer=make_layer,
85 | make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
86 | make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
87 | make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer,
88 | kp_layer=residual
89 | ):
90 | super(kp, self).__init__()
91 |
92 | self.nstack = nstack
93 | self._decode = _decode
94 | self._db = db
95 | self.K = self._db.configs["top_k"]
96 | self.ae_threshold = self._db.configs["ae_threshold"]
97 | self.kernel = self._db.configs["nms_kernel"]
98 | self.input_size = self._db.configs["input_size"][0]
99 | self.output_size = self._db.configs["output_sizes"][0][0]
100 |
101 | curr_dim = dims[0]
102 |
103 | self.pre = nn.Sequential(
104 | convolution(7, 3, 128, stride=2),
105 | residual(3, 128, 256, stride=2)
106 | ) if pre is None else pre
107 |
108 | self.kps = nn.ModuleList([
109 | kp_module(
110 | n, dims, modules, layer=kp_layer,
111 | make_up_layer=make_up_layer,
112 | make_low_layer=make_low_layer,
113 | make_hg_layer=make_hg_layer,
114 | make_hg_layer_revr=make_hg_layer_revr,
115 | make_pool_layer=make_pool_layer,
116 | make_unpool_layer=make_unpool_layer,
117 | make_merge_layer=make_merge_layer
118 | ) for _ in range(nstack)
119 | ])
120 | self.cnvs = nn.ModuleList([
121 | make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
122 | ])
123 |
124 | self.tl_cnvs = nn.ModuleList([
125 | make_tl_layer(cnv_dim) for _ in range(nstack)
126 | ])
127 | self.br_cnvs = nn.ModuleList([
128 | make_br_layer(cnv_dim) for _ in range(nstack)
129 | ])
130 |
131 | self.ct_cnvs = nn.ModuleList([
132 | make_ct_layer(cnv_dim) for _ in range(nstack)
133 | ])
134 |
135 | ## keypoint heatmaps
136 | self.tl_heats = nn.ModuleList([
137 | make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack)
138 | ])
139 | self.br_heats = nn.ModuleList([
140 | make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack)
141 | ])
142 |
143 | self.ct_heats = nn.ModuleList([
144 | make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack)
145 | ])
146 |
147 | ## tags
148 | self.tl_tags = nn.ModuleList([
149 | make_tag_layer(cnv_dim, curr_dim, 1) for _ in range(nstack)
150 | ])
151 | self.br_tags = nn.ModuleList([
152 | make_tag_layer(cnv_dim, curr_dim, 1) for _ in range(nstack)
153 | ])
154 |
155 | for tl_heat, br_heat, ct_heat in zip(self.tl_heats, self.br_heats, self.ct_heats):
156 | tl_heat[-1].bias.data.fill_(-2.19)
157 | br_heat[-1].bias.data.fill_(-2.19)
158 | ct_heat[-1].bias.data.fill_(-2.19)
159 |
160 | self.inters = nn.ModuleList([
161 | make_inter_layer(curr_dim) for _ in range(nstack - 1)
162 | ])
163 |
164 | self.inters_ = nn.ModuleList([
165 | nn.Sequential(
166 | nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
167 | nn.BatchNorm2d(curr_dim)
168 | ) for _ in range(nstack - 1)
169 | ])
170 | self.cnvs_ = nn.ModuleList([
171 | nn.Sequential(
172 | nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
173 | nn.BatchNorm2d(curr_dim)
174 | ) for _ in range(nstack - 1)
175 | ])
176 |
177 | self.tl_regrs = nn.ModuleList([
178 | make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack)
179 | ])
180 | self.br_regrs = nn.ModuleList([
181 | make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack)
182 | ])
183 | self.ct_regrs = nn.ModuleList([
184 | make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack)
185 | ])
186 |
187 | self.relu = nn.ReLU(inplace=True)
188 |
189 | def _train(self, *xs):
190 | image = xs[0]
191 | tl_inds = xs[1]
192 | br_inds = xs[2]
193 | ct_inds = xs[3]
194 |
195 | inter = self.pre(image)
196 | outs = []
197 |
198 | layers = zip(
199 | self.kps, self.cnvs,
200 | self.tl_cnvs, self.br_cnvs,
201 | self.ct_cnvs, self.tl_heats,
202 | self.br_heats, self.ct_heats,
203 | self.tl_tags, self.br_tags,
204 | self.tl_regrs, self.br_regrs,
205 | self.ct_regrs
206 | )
207 | for ind, layer in enumerate(layers):
208 | kp_, cnv_ = layer[0:2]
209 | tl_cnv_, br_cnv_ = layer[2:4]
210 | ct_cnv_, tl_heat_ = layer[4:6]
211 | br_heat_, ct_heat_ = layer[6:8]
212 | tl_tag_, br_tag_ = layer[8:10]
213 | tl_regr_, br_regr_ = layer[10:12]
214 | ct_regr_ = layer[12]
215 |
216 | kp = kp_(inter)
217 | cnv = cnv_(kp)
218 |
219 | tl_cnv = tl_cnv_(cnv)
220 | br_cnv = br_cnv_(cnv)
221 | ct_cnv = ct_cnv_(cnv)
222 |
223 | tl_heat, br_heat, ct_heat = tl_heat_(tl_cnv), br_heat_(br_cnv), ct_heat_(ct_cnv)
224 | tl_tag, br_tag = tl_tag_(tl_cnv), br_tag_(br_cnv)
225 | tl_regr, br_regr, ct_regr = tl_regr_(tl_cnv), br_regr_(br_cnv), ct_regr_(ct_cnv)
226 |
227 | tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds)
228 | br_tag = _tranpose_and_gather_feat(br_tag, br_inds)
229 | tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds)
230 | br_regr = _tranpose_and_gather_feat(br_regr, br_inds)
231 | ct_regr = _tranpose_and_gather_feat(ct_regr, ct_inds)
232 | # here tl_tag and br_tag are embedding scalar respectively to group tl and br
233 |
234 | outs += [tl_heat, br_heat, ct_heat, tl_tag, br_tag, tl_regr, br_regr, ct_regr]
235 |
236 | if ind < self.nstack - 1:
237 | inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
238 | inter = self.relu(inter)
239 | inter = self.inters[ind](inter)
240 |
241 | return outs
242 |
243 | def _test(self, *xs, **kwargs):
244 | image = xs[0]
245 |
246 | inter = self.pre(image)
247 |
248 | outs = []
249 |
250 | layers = zip(
251 | self.kps, self.cnvs,
252 | self.tl_cnvs, self.br_cnvs,
253 | self.ct_cnvs, self.tl_heats,
254 | self.br_heats, self.ct_heats,
255 | self.tl_tags, self.br_tags,
256 | self.tl_regrs, self.br_regrs,
257 | self.ct_regrs
258 | )
259 | for ind, layer in enumerate(layers):
260 | kp_, cnv_ = layer[0:2]
261 | tl_cnv_, br_cnv_ = layer[2:4]
262 | ct_cnv_, tl_heat_ = layer[4:6]
263 | br_heat_, ct_heat_ = layer[6:8]
264 | tl_tag_, br_tag_ = layer[8:10]
265 | tl_regr_, br_regr_ = layer[10:12]
266 | ct_regr_ = layer[12]
267 |
268 | kp = kp_(inter)
269 | cnv = cnv_(kp)
270 |
271 | if ind == self.nstack - 1:
272 | tl_cnv = tl_cnv_(cnv)
273 | br_cnv = br_cnv_(cnv)
274 | ct_cnv = ct_cnv_(cnv)
275 |
276 | tl_heat, br_heat, ct_heat = tl_heat_(tl_cnv), br_heat_(br_cnv), ct_heat_(ct_cnv)
277 | tl_tag, br_tag = tl_tag_(tl_cnv), br_tag_(br_cnv)
278 | tl_regr, br_regr, ct_regr = tl_regr_(tl_cnv), br_regr_(br_cnv), ct_regr_(ct_cnv)
279 |
280 | outs += [tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr,
281 | ct_heat, ct_regr]
282 |
283 | if ind < self.nstack - 1:
284 | inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
285 | inter = self.relu(inter)
286 | inter = self.inters[ind](inter)
287 |
288 | return self._decode(*outs[-8:], **kwargs)
289 |
290 | def forward(self, *xs, **kwargs):
291 | if len(xs) > 1:
292 | return self._train(*xs, **kwargs)
293 | return self._test(*xs, **kwargs)
294 |
295 |
296 | class AELoss(nn.Module):
297 | def __init__(self, pull_weight=1, push_weight=1, regr_weight=1, focal_loss=_neg_loss):
298 | super(AELoss, self).__init__()
299 |
300 | self.pull_weight = pull_weight
301 | self.push_weight = push_weight
302 | self.regr_weight = regr_weight
303 | self.focal_loss = focal_loss
304 | self.ae_loss = _ae_loss
305 | self.regr_loss = _regr_loss
306 |
307 | def forward(self, outs, targets):
308 | stride = 8
309 |
310 | tl_heats = outs[0::stride]
311 | br_heats = outs[1::stride]
312 | ct_heats = outs[2::stride]
313 | tl_tags = outs[3::stride]
314 | br_tags = outs[4::stride]
315 | tl_regrs = outs[5::stride]
316 | br_regrs = outs[6::stride]
317 | ct_regrs = outs[7::stride]
318 |
319 | gt_tl_heat = targets[0]
320 | gt_br_heat = targets[1]
321 | gt_ct_heat = targets[2]
322 | gt_mask = targets[3]
323 | gt_tl_regr = targets[4]
324 | gt_br_regr = targets[5]
325 | gt_ct_regr = targets[6]
326 |
327 | # focal loss
328 | focal_loss = 0
329 |
330 | tl_heats = [_sigmoid(t) for t in tl_heats]
331 | br_heats = [_sigmoid(b) for b in br_heats]
332 | ct_heats = [_sigmoid(c) for c in ct_heats]
333 |
334 | focal_loss += self.focal_loss(tl_heats, gt_tl_heat)
335 | focal_loss += self.focal_loss(br_heats, gt_br_heat)
336 | focal_loss += self.focal_loss(ct_heats, gt_ct_heat)
337 |
338 | # tag loss
339 | pull_loss = 0
340 | push_loss = 0
341 |
342 | for tl_tag, br_tag in zip(tl_tags, br_tags):
343 | pull, push = self.ae_loss(tl_tag, br_tag, gt_mask)
344 | pull_loss += pull
345 | push_loss += push
346 | pull_loss = self.pull_weight * pull_loss
347 | push_loss = self.push_weight * push_loss
348 |
349 | regr_loss = 0
350 | for tl_regr, br_regr, ct_regr in zip(tl_regrs, br_regrs, ct_regrs):
351 | regr_loss += self.regr_loss(tl_regr, gt_tl_regr, gt_mask)
352 | regr_loss += self.regr_loss(br_regr, gt_br_regr, gt_mask)
353 | regr_loss += self.regr_loss(ct_regr, gt_ct_regr, gt_mask)
354 | regr_loss = self.regr_weight * regr_loss
355 |
356 | loss = (focal_loss + pull_loss + push_loss + regr_loss) / len(tl_heats)
357 | return loss.unsqueeze(0), (focal_loss / len(tl_heats)).unsqueeze(0), (pull_loss / len(tl_heats)).unsqueeze(0), (
358 | push_loss / len(tl_heats)).unsqueeze(0), (regr_loss / len(tl_heats)).unsqueeze(0)
359 |
--------------------------------------------------------------------------------
/models/py_utils/kp_utils.py:
--------------------------------------------------------------------------------
1 | import pdb
2 | import torch
3 | import torch.nn as nn
4 |
5 | from .utils import convolution, residual
6 |
7 |
8 | class MergeUp(nn.Module):
9 | def forward(self, up1, up2):
10 | return up1 + up2
11 |
12 |
13 | def make_merge_layer(dim):
14 | return MergeUp()
15 |
16 |
17 | def make_tl_layer(dim):
18 | return None
19 |
20 |
21 | def make_br_layer(dim):
22 | return None
23 |
24 |
25 | def make_ct_layer(dim):
26 | return None
27 |
28 |
29 | def make_pool_layer(dim):
30 | return nn.MaxPool2d(kernel_size=2, stride=2)
31 |
32 |
33 | def make_unpool_layer(dim):
34 | return nn.Upsample(scale_factor=2)
35 |
36 |
37 | def make_kp_layer(cnv_dim, curr_dim, out_dim):
38 | return nn.Sequential(
39 | convolution(3, cnv_dim, curr_dim, with_bn=False),
40 | nn.Conv2d(curr_dim, out_dim, (1, 1))
41 | )
42 |
43 |
44 | def make_inter_layer(dim):
45 | return residual(3, dim, dim)
46 |
47 |
48 | def make_cnv_layer(inp_dim, out_dim):
49 | return convolution(3, inp_dim, out_dim)
50 |
51 |
52 | def _gather_feat(feat, ind, mask=None):
53 | dim = feat.size(2)
54 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
55 | feat = feat.gather(1, ind)
56 | if mask is not None:
57 | mask = mask.unsqueeze(2).expand_as(feat)
58 | feat = feat[mask]
59 | feat = feat.view(-1, dim)
60 | return feat
61 |
62 |
63 | def _nms(heat, kernel=1):
64 | pad = (kernel - 1) // 2
65 |
66 | hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
67 | keep = (hmax == heat).float()
68 | return heat * keep
69 |
70 |
71 | def _tranpose_and_gather_feat(feat, ind):
72 | feat = feat.permute(0, 2, 3, 1).contiguous()
73 | feat = feat.view(feat.size(0), -1, feat.size(3))
74 | feat = _gather_feat(feat, ind)
75 | return feat
76 |
77 |
78 | def _topk(scores, K=20):
79 | batch, cat, height, width = scores.size()
80 |
81 | topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
82 |
83 | topk_clses = (topk_inds / (height * width)).int()
84 |
85 | topk_inds = topk_inds % (height * width)
86 | topk_ys = (topk_inds / width).int().float()
87 | topk_xs = (topk_inds % width).int().float()
88 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
89 |
90 |
91 | def _decode(
92 | tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, ct_heat, ct_regr,
93 | K=100, kernel=1, ae_threshold=1, num_dets=1000
94 | ):
95 | batch, cat, height, width = tl_heat.size()
96 |
97 | tl_heat = torch.sigmoid(tl_heat)
98 | br_heat = torch.sigmoid(br_heat)
99 | ct_heat = torch.sigmoid(ct_heat)
100 |
101 | # perform nms on heatmaps
102 | tl_heat = _nms(tl_heat, kernel=kernel)
103 | br_heat = _nms(br_heat, kernel=kernel)
104 | ct_heat = _nms(ct_heat, kernel=kernel)
105 |
106 | tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K)
107 | br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K)
108 | ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = _topk(ct_heat, K=K)
109 |
110 | tl_ys = tl_ys.view(batch, K, 1).expand(batch, K, K)
111 | tl_xs = tl_xs.view(batch, K, 1).expand(batch, K, K)
112 | br_ys = br_ys.view(batch, 1, K).expand(batch, K, K)
113 | br_xs = br_xs.view(batch, 1, K).expand(batch, K, K)
114 | ct_ys = ct_ys.view(batch, 1, K).expand(batch, K, K)
115 | ct_xs = ct_xs.view(batch, 1, K).expand(batch, K, K)
116 |
117 | if tl_regr is not None and br_regr is not None:
118 | tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds)
119 | tl_regr = tl_regr.view(batch, K, 1, 2)
120 | br_regr = _tranpose_and_gather_feat(br_regr, br_inds)
121 | br_regr = br_regr.view(batch, 1, K, 2)
122 | ct_regr = _tranpose_and_gather_feat(ct_regr, ct_inds)
123 | ct_regr = ct_regr.view(batch, 1, K, 2)
124 |
125 | tl_xs = tl_xs + tl_regr[..., 0]
126 | tl_ys = tl_ys + tl_regr[..., 1]
127 | br_xs = br_xs + br_regr[..., 0]
128 | br_ys = br_ys + br_regr[..., 1]
129 | ct_xs = ct_xs + ct_regr[..., 0]
130 | ct_ys = ct_ys + ct_regr[..., 1]
131 |
132 | # all possible boxes based on top k corners (ignoring class)
133 | bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3)
134 |
135 | tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds)
136 | tl_tag = tl_tag.view(batch, K, 1)
137 | br_tag = _tranpose_and_gather_feat(br_tag, br_inds)
138 | br_tag = br_tag.view(batch, 1, K)
139 | dists = torch.abs(tl_tag - br_tag)
140 |
141 | tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K)
142 | br_scores = br_scores.view(batch, 1, K).expand(batch, K, K)
143 | scores = (tl_scores + br_scores) / 2
144 |
145 | # reject boxes based on classes
146 | tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K)
147 | br_clses = br_clses.view(batch, 1, K).expand(batch, K, K)
148 | cls_inds = (tl_clses != br_clses)
149 |
150 | # reject boxes based on distances
151 | dist_inds = (dists > ae_threshold)
152 |
153 | # reject boxes based on widths and heights
154 | width_inds = (br_xs < tl_xs)
155 | height_inds = (br_ys < tl_ys)
156 |
157 | scores[cls_inds] = -1
158 | scores[dist_inds] = -1
159 | scores[width_inds] = -1
160 | scores[height_inds] = -1
161 |
162 | scores = scores.view(batch, -1)
163 | scores, inds = torch.topk(scores, num_dets)
164 | scores = scores.unsqueeze(2)
165 |
166 | bboxes = bboxes.view(batch, -1, 4)
167 | bboxes = _gather_feat(bboxes, inds)
168 |
169 | # width = (bboxes[:,:,2] - bboxes[:,:,0]).unsqueeze(2)
170 | # height = (bboxes[:,:,2] - bboxes[:,:,0]).unsqueeze(2)
171 |
172 | clses = tl_clses.contiguous().view(batch, -1, 1)
173 | clses = _gather_feat(clses, inds).float()
174 |
175 | tl_scores = tl_scores.contiguous().view(batch, -1, 1)
176 | tl_scores = _gather_feat(tl_scores, inds).float()
177 | br_scores = br_scores.contiguous().view(batch, -1, 1)
178 | br_scores = _gather_feat(br_scores, inds).float()
179 |
180 | ct_xs = ct_xs[:, 0, :]
181 | ct_ys = ct_ys[:, 0, :]
182 |
183 | center = torch.cat([ct_xs.unsqueeze(2), ct_ys.unsqueeze(2), ct_clses.float().unsqueeze(2), ct_scores.unsqueeze(2)],
184 | dim=2)
185 | detections = torch.cat([bboxes, scores, tl_scores, br_scores, clses], dim=2)
186 | return detections, center
187 |
188 |
189 | def _neg_loss(preds, gt):
190 | pos_inds = gt.eq(1)
191 | neg_inds = gt.lt(1)
192 |
193 | neg_weights = torch.pow(1 - gt[neg_inds], 4)
194 |
195 | loss = 0
196 | for pred in preds:
197 | pos_pred = pred[pos_inds]
198 | neg_pred = pred[neg_inds]
199 |
200 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
201 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
202 |
203 | num_pos = pos_inds.float().sum()
204 | pos_loss = pos_loss.sum()
205 | neg_loss = neg_loss.sum()
206 |
207 | if pos_pred.nelement() == 0:
208 | loss = loss - neg_loss
209 | else:
210 | loss = loss - (pos_loss + neg_loss) / num_pos
211 | return loss
212 |
213 |
214 | def _sigmoid(x):
215 | x = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
216 | return x
217 |
218 |
219 | def _ae_loss(tag0, tag1, mask):
220 | num = mask.sum(dim=1, keepdim=True).float()
221 | tag0 = tag0.squeeze()
222 | tag1 = tag1.squeeze()
223 |
224 | tag_mean = (tag0 + tag1) / 2
225 |
226 | tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4)
227 | tag0 = tag0[mask].sum()
228 | tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4)
229 | tag1 = tag1[mask].sum()
230 | pull = tag0 + tag1
231 |
232 | mask = mask.unsqueeze(1) + mask.unsqueeze(2)
233 | mask = mask.eq(2)
234 | num = num.unsqueeze(2)
235 | num2 = (num - 1) * num
236 | dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2)
237 | dist = 1 - torch.abs(dist)
238 | dist = nn.functional.relu(dist, inplace=True)
239 | dist = dist - 1 / (num + 1e-4)
240 | dist = dist / (num2 + 1e-4)
241 | dist = dist[mask]
242 | push = dist.sum()
243 | return pull, push
244 |
245 |
246 | def _regr_loss(regr, gt_regr, mask):
247 | num = mask.float().sum()
248 | mask = mask.unsqueeze(2).expand_as(gt_regr)
249 |
250 | regr = regr[mask]
251 | gt_regr = gt_regr[mask]
252 |
253 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
254 | regr_loss = regr_loss / (num + 1e-4)
255 | return regr_loss
256 |
--------------------------------------------------------------------------------
/models/py_utils/scatter_gather.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | from torch.nn.parallel._functions import Scatter, Gather
4 |
5 |
6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
7 | r"""
8 | Slices variables into approximately equal chunks and
9 | distributes them across given GPUs. Duplicates
10 | references to objects that are not variables. Does not
11 | support Tensors.
12 | """
13 | def scatter_map(obj):
14 | if isinstance(obj, Variable):
15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 | if isinstance(obj, tuple):
18 | return list(zip(*map(scatter_map, obj)))
19 | if isinstance(obj, list):
20 | return list(map(list, zip(*map(scatter_map, obj))))
21 | if isinstance(obj, dict):
22 | return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 | return [obj for targets in target_gpus]
24 |
25 | return scatter_map(inputs)
26 |
27 |
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 | r"""Scatter with support for kwargs dictionary"""
30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 | if len(inputs) < len(kwargs):
33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 | elif len(kwargs) < len(inputs):
35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 | inputs = tuple(inputs)
37 | kwargs = tuple(kwargs)
38 | return inputs, kwargs
39 |
--------------------------------------------------------------------------------
/models/py_utils/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | # a convolution module containing a convolution layer, a bn layer and a relu activation
5 | # the sizes of input and output are same
6 | class convolution(nn.Module):
7 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
8 | super(convolution, self).__init__()
9 |
10 | pad = (k - 1) // 2
11 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
12 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
13 | self.relu = nn.ReLU(inplace=True)
14 |
15 | def forward(self, x):
16 | conv = self.conv(x)
17 | bn = self.bn(conv)
18 | relu = self.relu(bn)
19 | return relu
20 |
21 | # a fully-contection module containg a linear layer, a bn layer or not and a relu activation
22 | class fully_connected(nn.Module):
23 | def __init__(self, inp_dim, out_dim, with_bn=True):
24 | super(fully_connected, self).__init__()
25 | self.with_bn = with_bn
26 |
27 | self.linear = nn.Linear(inp_dim, out_dim)
28 | if self.with_bn:
29 | self.bn = nn.BatchNorm1d(out_dim)
30 | self.relu = nn.ReLU(inplace=True)
31 |
32 | def forward(self, x):
33 | linear = self.linear(x)
34 | bn = self.bn(linear) if self.with_bn else linear
35 | relu = self.relu(bn)
36 | return relu
37 |
38 | # a residual module containg two branches, master branch contains two convolution layers while
39 | # the skip contains a convolution layer if stride is not equals to 1 in master branch or the
40 | # input channel isnot equal to the output channel
41 | class residual(nn.Module):
42 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
43 | super(residual, self).__init__()
44 |
45 | self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
46 | self.bn1 = nn.BatchNorm2d(out_dim)
47 | self.relu1 = nn.ReLU(inplace=True)
48 |
49 | self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
50 | self.bn2 = nn.BatchNorm2d(out_dim)
51 |
52 | self.skip = nn.Sequential(
53 | nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
54 | nn.BatchNorm2d(out_dim)
55 | ) if stride != 1 or inp_dim != out_dim else nn.Sequential()
56 | self.relu = nn.ReLU(inplace=True)
57 |
58 | def forward(self, x):
59 | conv1 = self.conv1(x)
60 | bn1 = self.bn1(conv1)
61 | relu1 = self.relu1(bn1)
62 |
63 | conv2 = self.conv2(relu1)
64 | bn2 = self.bn2(conv2)
65 |
66 | skip = self.skip(x)
67 | return self.relu(bn2 + skip)
68 |
69 | # stack modules layers, here the default layer is convolution module
70 | # only the first convolution module's input channel can not be equal to output's channel
71 | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
72 | layers = [layer(k, inp_dim, out_dim, **kwargs)]
73 | for _ in range(1, modules):
74 | layers.append(layer(k, out_dim, out_dim, **kwargs))
75 | return nn.Sequential(*layers)
76 |
77 | # the reverse operation of `make_layer`
78 | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
79 | layers = []
80 | for _ in range(modules - 1):
81 | layers.append(layer(k, inp_dim, inp_dim, **kwargs))
82 | layers.append(layer(k, inp_dim, out_dim, **kwargs))
83 | return nn.Sequential(*layers)
84 |
--------------------------------------------------------------------------------
/nnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/nnet/__init__.py
--------------------------------------------------------------------------------
/nnet/py_factory.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pdb
3 | import torch
4 | import importlib
5 | import torch.nn as nn
6 |
7 | from config import system_configs
8 | from models.py_utils.data_parallel import DataParallel
9 |
10 | torch.manual_seed(317)
11 |
12 | class Network(nn.Module):
13 | def __init__(self, model, loss):
14 | super(Network, self).__init__()
15 |
16 | self.model = model
17 | self.loss = loss
18 |
19 | def forward(self, xs, ys, **kwargs):
20 | preds = self.model(*xs, **kwargs)
21 | loss_kp = self.loss(preds, ys, **kwargs)
22 | return loss_kp
23 |
24 | # for model backward compatibility
25 | # previously model was wrapped by DataParallel module
26 | class DummyModule(nn.Module):
27 | def __init__(self, model):
28 | super(DummyModule, self).__init__()
29 | self.module = model
30 |
31 | def forward(self, *xs, **kwargs):
32 | return self.module(*xs, **kwargs)
33 |
34 |
35 | class NetworkFactory(object):
36 | def __init__(self, db):
37 | super(NetworkFactory, self).__init__()
38 |
39 | module_file = "models.{}".format(system_configs.snapshot_name)
40 | print("module_file: {}".format(module_file))
41 | nnet_module = importlib.import_module(module_file)
42 |
43 | self.model = DummyModule(nnet_module.model(db))
44 | self.loss = nnet_module.loss
45 | self.network = Network(self.model, self.loss)
46 | self.network = DataParallel(self.network, chunk_sizes=system_configs.chunk_sizes).cuda()
47 | self.load_cropped_pretrained_model("cache/nnet/CenterNet-52/CenterNet-52_480000.pkl")
48 |
49 | total_params = 0
50 | for params in self.model.parameters():
51 | num_params = 1
52 | for x in params.size():
53 | num_params *= x
54 | total_params += num_params
55 | print("total parameters: {}".format(total_params))
56 |
57 | # self.fix_layers() # fix kps and prelayer
58 |
59 | if system_configs.opt_algo == "adam":
60 | self.optimizer = torch.optim.Adam(
61 | filter(lambda p: p.requires_grad, self.model.parameters())
62 | )
63 | elif system_configs.opt_algo == "sgd":
64 | self.optimizer = torch.optim.SGD(
65 | filter(lambda p: p.requires_grad, self.model.parameters()),
66 | lr=system_configs.learning_rate,
67 | momentum=0.9, weight_decay=0.0001
68 | )
69 | else:
70 | raise ValueError("unknown optimizer")
71 |
72 | def cuda(self):
73 | self.model.cuda()
74 |
75 | def load_cropped_pretrained_model(self, params_file):
76 | x = torch.load(params_file)
77 | params = {'module.model.%s'%k: v for k, v in x.items() if 'heats' not in k}
78 | self.network.load_state_dict(params, strict=False)
79 | print("load the cropped weights from COCO successfully.")
80 |
81 | def fix_layers(self):
82 | for m, v in self.network.named_parameters():
83 | if '.pre' in m or '.kps' in m:
84 | v.requires_grad = False
85 |
86 | def train_mode(self):
87 | self.network.train()
88 |
89 | def eval_mode(self):
90 | self.network.eval()
91 |
92 | def train(self, xs, ys, **kwargs):
93 | xs = [x for x in xs]
94 | ys = [y for y in ys]
95 |
96 | self.optimizer.zero_grad()
97 | loss_kp = self.network(xs, ys)
98 | loss = loss_kp[0]
99 | focal_loss = loss_kp[1]
100 | pull_loss = loss_kp[2]
101 | push_loss = loss_kp[3]
102 | regr_loss = loss_kp[4]
103 | loss = loss.mean()
104 | focal_loss = focal_loss.mean()
105 | pull_loss = pull_loss.mean()
106 | push_loss = push_loss.mean()
107 | regr_loss = regr_loss.mean()
108 | loss.backward()
109 | self.optimizer.step()
110 | return loss, focal_loss, pull_loss, push_loss, regr_loss
111 |
112 | def validate(self, xs, ys, **kwargs):
113 | with torch.no_grad():
114 | xs = [x.cuda(non_blocking=True) for x in xs]
115 | ys = [y.cuda(non_blocking=True) for y in ys]
116 |
117 | loss_kp = self.network(xs, ys)
118 | loss = loss_kp[0]
119 | focal_loss = loss_kp[1]
120 | pull_loss = loss_kp[2]
121 | push_loss = loss_kp[3]
122 | regr_loss = loss_kp[4]
123 | loss = loss.mean()
124 | return loss
125 |
126 | def test(self, xs, **kwargs):
127 | with torch.no_grad():
128 | xs = [x.cuda(non_blocking=True) for x in xs]
129 | return self.model(*xs, **kwargs)
130 |
131 | def set_lr(self, lr):
132 | print("setting learning rate to: {}".format(lr))
133 | for param_group in self.optimizer.param_groups:
134 | param_group["lr"] = lr
135 |
136 | def load_pretrained_params(self, pretrained_model):
137 | print("loading from {}".format(pretrained_model))
138 | with open(pretrained_model, "rb") as f:
139 | params = torch.load(f)
140 | self.model.load_state_dict(params)
141 |
142 | def load_params(self, iteration):
143 | cache_file = system_configs.snapshot_file.format(iteration)
144 | print("loading model from {}".format(cache_file))
145 | with open(cache_file, "rb") as f:
146 | params = torch.load(f)
147 | self.model.load_state_dict(params)
148 |
149 | def save_params(self, iteration):
150 | cache_file = system_configs.snapshot_file.format(iteration)
151 | print("saving model to {}".format(cache_file))
152 | with open(cache_file, "wb") as f:
153 | params = self.model.state_dict()
154 | torch.save(params, f)
155 |
--------------------------------------------------------------------------------
/sample/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/sample/__init__.py
--------------------------------------------------------------------------------
/sample/pedestrian.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import math
3 | import numpy as np
4 | import torch
5 | import random
6 | import string
7 |
8 | from config import system_configs
9 | from utils import crop_image, normalize_, color_jittering_, lighting_
10 | from .utils import random_crop, draw_gaussian, gaussian_radius
11 |
12 |
13 | def _full_image_crop(image, detections):
14 | detections = detections.copy()
15 | height, width = image.shape[0:2]
16 |
17 | max_hw = max(height, width)
18 | center = [height // 2, width // 2]
19 | size = [max_hw, max_hw]
20 |
21 | image, border, offset = crop_image(image, center, size)
22 | detections[:, 0:4:2] += border[2]
23 | detections[:, 1:4:2] += border[0]
24 | return image, detections
25 |
26 |
27 | def _resize_image(image, detections, size):
28 | detections = detections.copy()
29 | height, width = image.shape[0:2]
30 | new_height, new_width = size
31 |
32 | image = cv2.resize(image, (new_width, new_height))
33 |
34 | height_ratio = new_height / height
35 | width_ratio = new_width / width
36 | detections[:, 0:4:2] *= width_ratio
37 | detections[:, 1:4:2] *= height_ratio
38 | return image, detections
39 |
40 |
41 | def _clip_detections(image, detections):
42 | detections = detections.copy()
43 | height, width = image.shape[0:2]
44 |
45 | detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1)
46 | detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1)
47 | keep_inds = ((detections[:, 2] - detections[:, 0]) > 0) & \
48 | ((detections[:, 3] - detections[:, 1]) > 0)
49 | detections = detections[keep_inds]
50 | return detections
51 |
52 |
53 | def kp_detection(db, k_ind, data_aug, debug):
54 | data_rng = system_configs.data_rng
55 | batch_size = system_configs.batch_size
56 |
57 | categories = db.configs["categories"]
58 | input_size = db.configs["input_size"]
59 | output_size = db.configs["output_sizes"][0]
60 |
61 | border = db.configs["border"]
62 | lighting = db.configs["lighting"]
63 | rand_crop = db.configs["rand_crop"]
64 | rand_color = db.configs["rand_color"]
65 | rand_scales = db.configs["rand_scales"]
66 | gaussian_bump = db.configs["gaussian_bump"]
67 | gaussian_iou = db.configs["gaussian_iou"]
68 | gaussian_rad = db.configs["gaussian_radius"]
69 |
70 | max_tag_len = 128
71 |
72 | # allocating memory
73 | images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32)
74 | tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
75 | br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
76 | ct_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
77 | tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
78 | br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
79 | ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
80 | tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
81 | br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
82 | ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
83 | tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
84 | tag_lens = np.zeros((batch_size,), dtype=np.int32)
85 |
86 | db_size = db.db_inds.size
87 | for b_ind in range(batch_size):
88 | if not debug and k_ind == 0:
89 | db.shuffle_inds()
90 |
91 | db_ind = db.db_inds[k_ind]
92 | k_ind = (k_ind + 1) % db_size
93 |
94 | # reading image
95 | image_file = db.image_file(db_ind)
96 | image = cv2.imread(image_file)
97 |
98 | # reading detections
99 | detections = db.detections(db_ind)
100 |
101 | # cropping an image randomly
102 | if not debug and rand_crop:
103 | image, detections = random_crop(image, detections, rand_scales, input_size, border=border)
104 | else:
105 | image, detections = _full_image_crop(image, detections)
106 |
107 | image, detections = _resize_image(image, detections, input_size)
108 | detections = _clip_detections(image, detections)
109 |
110 | width_ratio = output_size[1] / input_size[1]
111 | height_ratio = output_size[0] / input_size[0]
112 |
113 | # flipping an image randomly
114 | if not debug and np.random.uniform() > 0.5:
115 | image[:] = image[:, ::-1, :]
116 | width = image.shape[1]
117 | detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
118 |
119 | if not debug:
120 | image = image.astype(np.float32) / 255.
121 | if rand_color:
122 | color_jittering_(data_rng, image)
123 | if lighting:
124 | lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
125 | normalize_(image, db.mean, db.std)
126 | images[b_ind] = image.transpose((2, 0, 1))
127 |
128 | for ind, detection in enumerate(detections):
129 | category = int(detection[-1]) - 1
130 | # category = 0
131 |
132 | xtl, ytl = detection[0], detection[1]
133 | xbr, ybr = detection[2], detection[3]
134 | xct, yct = (detection[2] + detection[0]) / 2., (detection[3] + detection[1]) / 2.
135 |
136 | fxtl = (xtl * width_ratio)
137 | fytl = (ytl * height_ratio)
138 | fxbr = (xbr * width_ratio)
139 | fybr = (ybr * height_ratio)
140 | fxct = (xct * width_ratio)
141 | fyct = (yct * height_ratio)
142 |
143 | xtl = int(fxtl)
144 | ytl = int(fytl)
145 | xbr = int(fxbr)
146 | ybr = int(fybr)
147 | xct = int(fxct)
148 | yct = int(fyct)
149 |
150 | if gaussian_bump:
151 | width = detection[2] - detection[0]
152 | height = detection[3] - detection[1]
153 |
154 | width = math.ceil(width * width_ratio)
155 | height = math.ceil(height * height_ratio)
156 |
157 | if gaussian_rad == -1:
158 | radius = gaussian_radius((height, width), gaussian_iou)
159 | radius = max(0, int(radius))
160 | else:
161 | radius = gaussian_rad
162 |
163 | draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
164 | draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
165 | draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte=5)
166 |
167 | else:
168 | tl_heatmaps[b_ind, category, ytl, xtl] = 1
169 | br_heatmaps[b_ind, category, ybr, xbr] = 1
170 | ct_heatmaps[b_ind, category, yct, xct] = 1
171 |
172 | tag_ind = tag_lens[b_ind]
173 | tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
174 | br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
175 | ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct]
176 | tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
177 | br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
178 | ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct
179 | tag_lens[b_ind] += 1
180 |
181 | for b_ind in range(batch_size):
182 | tag_len = tag_lens[b_ind]
183 | tag_masks[b_ind, :tag_len] = 1
184 |
185 | images = torch.from_numpy(images)
186 | tl_heatmaps = torch.from_numpy(tl_heatmaps)
187 | br_heatmaps = torch.from_numpy(br_heatmaps)
188 | ct_heatmaps = torch.from_numpy(ct_heatmaps)
189 | tl_regrs = torch.from_numpy(tl_regrs)
190 | br_regrs = torch.from_numpy(br_regrs)
191 | ct_regrs = torch.from_numpy(ct_regrs)
192 | tl_tags = torch.from_numpy(tl_tags) # B x N, the index in (CHW)
193 | br_tags = torch.from_numpy(br_tags)
194 | ct_tags = torch.from_numpy(ct_tags)
195 | tag_masks = torch.from_numpy(tag_masks) # convinent for batch compute
196 |
197 | return {
198 | "xs": [images, tl_tags, br_tags, ct_tags],
199 | "ys": [tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs]
200 | }, k_ind
201 |
202 |
203 | def sample_data(db, k_ind, data_aug=True, debug=False):
204 | return globals()[system_configs.sampling_function](db, k_ind, data_aug, debug)
205 |
--------------------------------------------------------------------------------
/sample/utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | def gaussian2D(shape, sigma=1):
5 | m, n = [(ss - 1.) / 2. for ss in shape]
6 | y, x = np.ogrid[-m:m+1,-n:n+1]
7 |
8 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
9 | h[h < np.finfo(h.dtype).eps * h.max()] = 0
10 | return h
11 |
12 | def draw_gaussian(heatmap, center, radius, k=1, delte=6):
13 | diameter = 2 * radius + 1
14 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / delte)
15 |
16 | x, y = center
17 |
18 | height, width = heatmap.shape[0:2]
19 |
20 | left, right = min(x, radius), min(width - x, radius + 1)
21 | top, bottom = min(y, radius), min(height - y, radius + 1)
22 |
23 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
24 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
25 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
26 |
27 | def gaussian_radius(det_size, min_overlap):
28 | height, width = det_size
29 |
30 | a1 = 1
31 | b1 = (height + width)
32 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap)
33 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
34 | r1 = (b1 + sq1) / 2
35 |
36 | a2 = 4
37 | b2 = 2 * (height + width)
38 | c2 = (1 - min_overlap) * width * height
39 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
40 | r2 = (b2 + sq2) / 2
41 |
42 | a3 = 4 * min_overlap
43 | b3 = -2 * min_overlap * (height + width)
44 | c3 = (min_overlap - 1) * width * height
45 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
46 | r3 = (b3 + sq3) / 2
47 | return min(r1, r2, r3)
48 |
49 | def _get_border(border, size):
50 | i = 1
51 | while size - border // i <= border // i:
52 | i *= 2
53 | return border // i
54 |
55 | def random_crop(image, detections, random_scales, view_size, border=64):
56 | view_height, view_width = view_size
57 | image_height, image_width = image.shape[0:2]
58 |
59 | scale = np.random.choice(random_scales)
60 | height = int(view_height * scale)
61 | width = int(view_width * scale)
62 |
63 | cropped_image = np.zeros((height, width, 3), dtype=image.dtype)
64 |
65 | w_border = _get_border(border, image_width)
66 | h_border = _get_border(border, image_height)
67 |
68 | ctx = np.random.randint(low=w_border, high=image_width - w_border)
69 | cty = np.random.randint(low=h_border, high=image_height - h_border)
70 |
71 | x0, x1 = max(ctx - width // 2, 0), min(ctx + width // 2, image_width)
72 | y0, y1 = max(cty - height // 2, 0), min(cty + height // 2, image_height)
73 |
74 | left_w, right_w = ctx - x0, x1 - ctx
75 | top_h, bottom_h = cty - y0, y1 - cty
76 |
77 | # crop image
78 | cropped_ctx, cropped_cty = width // 2, height // 2
79 | x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w)
80 | y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h)
81 | cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
82 |
83 | # crop detections
84 | cropped_detections = detections.copy()
85 | cropped_detections[:, 0:4:2] -= x0
86 | cropped_detections[:, 1:4:2] -= y0
87 | cropped_detections[:, 0:4:2] += cropped_ctx - left_w
88 | cropped_detections[:, 1:4:2] += cropped_cty - top_h
89 |
90 | return cropped_image, cropped_detections
91 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import torch
4 | import pprint
5 | import argparse
6 | import importlib
7 | import numpy as np
8 |
9 | import matplotlib
10 | matplotlib.use("Agg")
11 |
12 | from config import system_configs
13 | from nnet.py_factory import NetworkFactory
14 | from db.datasets import datasets
15 |
16 | os.environ["CUDA_VISIBLE_DEVICES"] = '2'
17 | torch.backends.cudnn.benchmark = False
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser(description="Test CenterNet")
21 | parser.add_argument('--cfg_file', default='CenterNet-52', help='config file', type=str)
22 | parser.add_argument('--testiter', dest='testiter', help='test at iteration i',
23 | default=10000, type=int)
24 | parser.add_argument('--split', dest='split', help='which split to use',
25 | default='validation', type=str)
26 | parser.add_argument('--suffix', dest="suffix", default=None, type=str)
27 | parser.add_argument('--debug', action='store_true')
28 |
29 | args = parser.parse_args()
30 | return args
31 |
32 | def make_dirs(directories):
33 | for directory in directories:
34 | if not os.path.exists(directory):
35 | os.makedirs(directory)
36 |
37 | def test(db, split, testiter, debug=False, suffix=None):
38 | result_dir = system_configs.result_dir
39 | result_dir = os.path.join(result_dir, str(testiter), split)
40 |
41 | if suffix is not None:
42 | result_dir = os.path.join(result_dir, suffix)
43 |
44 | make_dirs([result_dir])
45 |
46 | test_iter = system_configs.max_iter if testiter is None else testiter
47 | print("loading parameters at iteration: {}".format(testiter))
48 |
49 | print("building network ...")
50 | nnet = NetworkFactory(db)
51 | print("loading parameters ...")
52 | nnet.load_params(test_iter)
53 |
54 | test_file = 'test.{}'.format(db.data)
55 | testing = importlib.import_module(test_file).testing
56 |
57 | nnet.cuda()
58 | nnet.eval_mode()
59 | testing(db, nnet, result_dir, debug=debug)
60 |
61 |
62 | if __name__ == '__main__':
63 | args = parse_args()
64 | if args.suffix is None:
65 | cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + '.json')
66 | else:
67 | cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + '-{}.json'.format(args.suffix))
68 | print("cfg file: {}".format(cfg_file))
69 |
70 | with open(cfg_file, "r") as f:
71 | configs = json.load(f)
72 |
73 | configs["system"]["snapshot_name"] = args.cfg_file
74 | system_configs.update_config(configs["system"])
75 |
76 | train_split = system_configs.train_split
77 | val_split = system_configs.val_split
78 |
79 | split = {
80 | "training": train_split,
81 | "validation": val_split,
82 | }[args.split]
83 |
84 | print("loading all datasets ...")
85 | dataset = system_configs.dataset
86 | print("split: {}".format(split))
87 | testing_db = datasets[dataset](configs["db"], split)
88 |
89 | print("system config...")
90 | pprint.pprint(system_configs.full)
91 |
92 | print("db config...")
93 | pprint.pprint(testing_db.configs)
94 |
95 | test(testing_db, args.split, args.testiter, args.debug, args.suffix)
96 |
97 |
--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/2648eb5a3172e6d3b5c23f3d9ffb90b55570e19a/test/__init__.py
--------------------------------------------------------------------------------
/test/base.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | from nnet.py_factory import NetworkFactory
4 |
5 | class Base(object):
6 | def __init__(self, db, nnet, func, model=None):
7 | super(Base, self).__init__()
8 |
9 | self._db = db
10 | self._nnet = nnet
11 | self._func = func
12 |
13 | if model is not None:
14 | self._nnet.load_pretrained_params(model)
15 |
16 | self._nnet.cuda()
17 | self._nnet.eval_mode()
18 |
19 | def _inference(self, image, *args, **kwargs):
20 | return self._func(self._db, self._nnet, image.copy(), *args, **kwargs)
21 |
22 | def __call__(self, image, *args, **kwargs):
23 | categories = self._db.configs["categories"]
24 | bboxes = self._inference(image, *args, **kwargs)
25 | return {'pedestrian': bboxes[j] for j in range(1, categories + 1)}
26 |
27 | def load_cfg(cfg_file):
28 | with open(cfg_file, "r") as f:
29 | cfg = json.load(f)
30 |
31 | cfg_sys = cfg["system"]
32 | cfg_db = cfg["db"]
33 | return cfg_sys, cfg_db
34 |
35 | def load_nnet(cfg_sys):
36 | return NetworkFactory(cfg_sys)
37 |
--------------------------------------------------------------------------------
/test/centernet.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import pdb
4 | import json
5 | import copy
6 | import numpy as np
7 | import torch
8 |
9 | from PIL import Image, ImageDraw, ImageFont
10 | import matplotlib.pyplot as plt
11 | import matplotlib
12 |
13 | from tqdm import tqdm
14 | from config import system_configs
15 | from utils import crop_image, normalize_
16 | from external.nms import soft_nms, soft_nms_merge
17 |
18 | colours = np.random.rand(80, 3)
19 |
20 |
21 | def _rescale_dets(detections, ratios, borders, sizes):
22 | xs, ys = detections[..., 0:4:2], detections[..., 1:4:2]
23 | xs /= ratios[:, 1][:, None, None]
24 | ys /= ratios[:, 0][:, None, None]
25 | xs -= borders[:, 2][:, None, None]
26 | ys -= borders[:, 0][:, None, None]
27 | tx_inds = xs[:, :, 0] <= -5
28 | bx_inds = xs[:, :, 1] >= sizes[0, 1] + 5
29 | ty_inds = ys[:, :, 0] <= -5
30 | by_inds = ys[:, :, 1] >= sizes[0, 0] + 5
31 |
32 | np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs)
33 | np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys)
34 | detections[:, tx_inds[0, :], 4] = -1
35 | detections[:, bx_inds[0, :], 4] = -1
36 | detections[:, ty_inds[0, :], 4] = -1
37 | detections[:, by_inds[0, :], 4] = -1
38 |
39 |
40 | def save_image(data, fn):
41 | sizes = np.shape(data)
42 | height = float(sizes[0])
43 | width = float(sizes[1])
44 |
45 | fig = plt.figure()
46 | fig.set_size_inches(width / height, 1, forward=False)
47 | ax = plt.Axes(fig, [0., 0., 1., 1.])
48 | ax.set_axis_off()
49 | fig.add_axes(ax)
50 |
51 | ax.imshow(data)
52 | plt.savefig(fn, dpi=height)
53 | plt.close()
54 |
55 |
56 | def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3):
57 | detections, center = nnet.test([images], ae_threshold=ae_threshold, K=K, kernel=kernel)
58 | detections = detections.data.cpu().numpy()
59 | center = center.data.cpu().numpy()
60 | return detections, center
61 |
62 |
63 | def inference(db, nnet, image, decode_func=kp_decode):
64 | K = db.configs["top_k"]
65 | ae_threshold = db.configs["ae_threshold"]
66 | nms_kernel = db.configs["nms_kernel"]
67 |
68 | scales = db.configs["test_scales"]
69 | weight_exp = db.configs["weight_exp"]
70 | merge_bbox = db.configs["merge_bbox"]
71 | categories = db.configs["categories"]
72 | nms_threshold = db.configs["nms_threshold"]
73 | max_per_image = db.configs["max_per_image"]
74 | nms_algorithm = {
75 | "nms": 0,
76 | "linear_soft_nms": 1,
77 | "exp_soft_nms": 2
78 | }[db.configs["nms_algorithm"]]
79 |
80 | height, width = image.shape[0:2]
81 | detections, center_points = [], []
82 |
83 | for scale in scales:
84 | new_height = int(height * scale)
85 | new_width = int(width * scale)
86 | new_center = np.array([new_height // 2, new_width // 2])
87 |
88 | inp_height = new_height | 127
89 | inp_width = new_width | 127
90 |
91 | images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
92 | ratios = np.zeros((1, 2), dtype=np.float32)
93 | borders = np.zeros((1, 4), dtype=np.float32)
94 | sizes = np.zeros((1, 2), dtype=np.float32)
95 |
96 | out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
97 | height_ratio = out_height / inp_height
98 | width_ratio = out_width / inp_width
99 |
100 | resized_image = cv2.resize(image, (new_width, new_height))
101 | resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])
102 |
103 | resized_image = resized_image / 255.
104 | normalize_(resized_image, db.mean, db.std)
105 |
106 | images[0] = resized_image.transpose((2, 0, 1))
107 | borders[0] = border
108 | sizes[0] = [int(height * scale), int(width * scale)]
109 | ratios[0] = [height_ratio, width_ratio]
110 |
111 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
112 | images = torch.from_numpy(images)
113 | dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
114 | dets = dets.reshape(2, -1, 8) # bboxes, scores, tl_scores, br_scores, clses
115 | center = center.reshape(2, -1, 4) # ct_xs, ct_ys, ct_clses, ct_scores
116 | dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] # flip
117 | center[1, :, [0]] = out_width - center[1, :, [0]] # horizontal flip
118 | dets = dets.reshape(1, -1, 8)
119 | center = center.reshape(1, -1, 4)
120 |
121 | _rescale_dets(dets, ratios, borders, sizes)
122 | center[..., [0]] /= ratios[:, 1][:, None, None] # remap to origin image
123 | center[..., [1]] /= ratios[:, 0][:, None, None]
124 | center[..., [0]] -= borders[:, 2][:, None, None]
125 | center[..., [1]] -= borders[:, 0][:, None, None]
126 | np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]])
127 | np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]])
128 | dets[:, :, 0:4] /= scale
129 | center[:, :, 0:2] /= scale # remap to origin image
130 |
131 | if scale == 1:
132 | center_points.append(center)
133 | detections.append(dets)
134 |
135 | detections = np.concatenate(detections, axis=1)
136 | center_points = np.concatenate(center_points, axis=1)
137 |
138 | classes = detections[..., -1]
139 | classes = classes[0]
140 | detections = detections[0]
141 | center_points = center_points[0]
142 |
143 | valid_ind = detections[:, 4] > -1
144 | valid_detections = detections[valid_ind]
145 |
146 | box_width = valid_detections[:, 2] - valid_detections[:, 0]
147 | box_height = valid_detections[:, 3] - valid_detections[:, 1]
148 |
149 | s_ind = (box_width * box_height <= 22500)
150 | l_ind = (box_width * box_height > 22500)
151 |
152 | s_detections = valid_detections[s_ind]
153 | l_detections = valid_detections[l_ind]
154 | # trisection
155 | s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 # x + (y-x)/3
156 | s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 # x +2(y-x)/3
157 | s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
158 | s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3
159 |
160 | s_temp_score = copy.copy(s_detections[:, 4])
161 | s_detections[:, 4] = -1
162 |
163 | center_x = center_points[:, 0][:, np.newaxis]
164 | center_y = center_points[:, 1][:, np.newaxis]
165 | s_left_x = s_left_x[np.newaxis, :]
166 | s_right_x = s_right_x[np.newaxis, :]
167 | s_top_y = s_top_y[np.newaxis, :]
168 | s_bottom_y = s_bottom_y[np.newaxis, :]
169 | # located in center region
170 | ind_lx = (center_x - s_left_x) > 0
171 | ind_rx = (center_x - s_right_x) < 0
172 | ind_ty = (center_y - s_top_y) > 0
173 | ind_by = (center_y - s_bottom_y) < 0
174 | # same classes
175 | ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
176 | ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1
177 | index_s_new_score = np.argmax(
178 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score],
179 | axis=0) # select the box having center located in the center region
180 | s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3
181 |
182 | l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
183 | l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
184 | l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
185 | l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5
186 |
187 | l_temp_score = copy.copy(l_detections[:, 4])
188 | l_detections[:, 4] = -1
189 |
190 | center_x = center_points[:, 0][:, np.newaxis]
191 | center_y = center_points[:, 1][:, np.newaxis]
192 | l_left_x = l_left_x[np.newaxis, :]
193 | l_right_x = l_right_x[np.newaxis, :]
194 | l_top_y = l_top_y[np.newaxis, :]
195 | l_bottom_y = l_bottom_y[np.newaxis, :]
196 |
197 | ind_lx = (center_x - l_left_x) > 0
198 | ind_rx = (center_x - l_right_x) < 0
199 | ind_ty = (center_y - l_top_y) > 0
200 | ind_by = (center_y - l_bottom_y) < 0
201 | ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0
202 | ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1
203 | index_l_new_score = np.argmax(
204 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0)
205 | l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3
206 |
207 | detections = np.concatenate([l_detections, s_detections], axis=0)
208 | detections = detections[np.argsort(-detections[:, 4])] # resort according to new scores
209 | classes = detections[..., -1]
210 |
211 | # reject detections with negative scores
212 | keep_inds = (detections[:, 4] > -1)
213 | detections = detections[keep_inds]
214 | classes = classes[keep_inds]
215 |
216 | # soft_nms
217 | top_bboxes = {}
218 | for j in range(categories):
219 | keep_inds = (classes == j)
220 | top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
221 | if merge_bbox:
222 | soft_nms_merge(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp)
223 | else:
224 | soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm)
225 | top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5]
226 |
227 | scores = np.hstack([top_bboxes[j][:, -1] for j in range(1, categories + 1)])
228 | # select boxes
229 | if len(scores) > max_per_image:
230 | kth = len(scores) - max_per_image
231 | thresh = np.partition(scores, kth)[kth]
232 | for j in range(1, categories + 1):
233 | keep_inds = (top_bboxes[j][:, -1] >= thresh)
234 | top_bboxes[j] = top_bboxes[j][keep_inds]
235 |
236 | return top_bboxes
--------------------------------------------------------------------------------
/test/detector.py:
--------------------------------------------------------------------------------
1 | from .base import Base, load_cfg, load_nnet
2 | from config import system_configs
3 | from db.datasets import datasets
4 | import pkg_resources
5 | import importlib
6 | import os
7 |
8 | _package_name = __name__
9 |
10 |
11 | def get_file_path(*paths):
12 | path = "/".join(paths)
13 | return pkg_resources.resource_filename(_package_name, path)
14 |
15 |
16 | class CenterNet(Base):
17 | def __init__(self, cfg_file, iter=10000, suffix=None):
18 | from test.centernet import inference
19 |
20 | model = importlib.import_module('models.%s'%cfg_file).model
21 | if suffix is None:
22 | cfg_path = os.path.join(system_configs.config_dir, "%s.json" % cfg_file)
23 | else:
24 | cfg_path = os.path.join(system_configs.config_dir, "%s-%s.json" % (cfg_file, suffix))
25 | model_path = get_file_path("..", "cache", "nnet", cfg_file, "%s_%d.pkl" % (cfg_file, iter))
26 | cfg_sys, cfg_db = load_cfg(cfg_path)
27 | cfg_sys["snapshot_name"] = cfg_file
28 | system_configs.update_config(cfg_sys)
29 | dataset = system_configs.dataset
30 | train_split = system_configs.train_split
31 | val_split = system_configs.val_split
32 | test_split = system_configs.test_split
33 |
34 | split = {
35 | "training": train_split,
36 | "validation": val_split,
37 | "testing": test_split
38 | }["validation"]
39 |
40 | demo = datasets[dataset](cfg_db, split)
41 |
42 | centernet = load_nnet(demo)
43 | super(CenterNet, self).__init__(demo, centernet, inference, model=model_path)
44 |
45 |
46 |
--------------------------------------------------------------------------------
/test/pedestrian.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import pdb
4 | import json
5 | import copy
6 | import numpy as np
7 | import torch
8 |
9 | from PIL import Image, ImageDraw, ImageFont
10 | import matplotlib.pyplot as plt
11 | import matplotlib
12 |
13 | from tqdm import tqdm
14 | from config import system_configs
15 | from utils import crop_image, normalize_
16 | from external.nms import soft_nms, soft_nms_merge
17 |
18 | colours = np.random.rand(80, 3)
19 |
20 |
21 | def _rescale_dets(detections, ratios, borders, sizes):
22 | xs, ys = detections[..., 0:4:2], detections[..., 1:4:2]
23 | xs /= ratios[:, 1][:, None, None]
24 | ys /= ratios[:, 0][:, None, None]
25 | xs -= borders[:, 2][:, None, None]
26 | ys -= borders[:, 0][:, None, None]
27 | tx_inds = xs[:, :, 0] <= -5
28 | bx_inds = xs[:, :, 1] >= sizes[0, 1] + 5
29 | ty_inds = ys[:, :, 0] <= -5
30 | by_inds = ys[:, :, 1] >= sizes[0, 0] + 5
31 |
32 | np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs)
33 | np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys)
34 | detections[:, tx_inds[0, :], 4] = -1
35 | detections[:, bx_inds[0, :], 4] = -1
36 | detections[:, ty_inds[0, :], 4] = -1
37 | detections[:, by_inds[0, :], 4] = -1
38 |
39 |
40 | def save_image(data, fn):
41 | sizes = np.shape(data)
42 | height = float(sizes[0])
43 | width = float(sizes[1])
44 |
45 | fig = plt.figure()
46 | fig.set_size_inches(width / height, 1, forward=False)
47 | ax = plt.Axes(fig, [0., 0., 1., 1.])
48 | ax.set_axis_off()
49 | fig.add_axes(ax)
50 |
51 | ax.imshow(data)
52 | plt.savefig(fn, dpi=height)
53 | plt.close()
54 |
55 |
56 | def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3):
57 | detections, center = nnet.test([images], ae_threshold=ae_threshold, K=K, kernel=kernel)
58 | detections = detections.data.cpu().numpy()
59 | center = center.data.cpu().numpy()
60 | return detections, center
61 |
62 |
63 | def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
64 | debug_dir = os.path.join(result_dir, "debug")
65 | if not os.path.exists(debug_dir):
66 | os.makedirs(debug_dir)
67 |
68 | if db.split != "trainval":
69 | db_inds = db.db_inds[:100] if debug else db.db_inds
70 | else:
71 | db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
72 | num_images = db_inds.size
73 |
74 | K = db.configs["top_k"]
75 | ae_threshold = db.configs["ae_threshold"] # group corners
76 | nms_kernel = db.configs["nms_kernel"] # nms for corners
77 |
78 | scales = db.configs["test_scales"]
79 | weight_exp = db.configs["weight_exp"] # for softnms
80 | merge_bbox = db.configs["merge_bbox"]
81 | categories = db.configs["categories"]
82 | nms_threshold = db.configs["nms_threshold"]
83 | max_per_image = db.configs["max_per_image"]
84 | nms_algorithm = {
85 | "nms": 0,
86 | "linear_soft_nms": 1,
87 | "exp_soft_nms": 2
88 | }[db.configs["nms_algorithm"]]
89 |
90 | top_bboxes = {}
91 | for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
92 | db_ind = db_inds[ind]
93 |
94 | image_id = db.image_ids(db_ind)
95 | image_file = db.image_file(db_ind)
96 | image = cv2.imread(image_file)
97 |
98 | height, width = image.shape[0:2]
99 |
100 | detections = []
101 | center_points = []
102 |
103 | for scale in scales:
104 | new_height = int(height * scale)
105 | new_width = int(width * scale)
106 | new_center = np.array([new_height // 2, new_width // 2])
107 |
108 | inp_height = new_height | 127
109 | inp_width = new_width | 127
110 |
111 | images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
112 | ratios = np.zeros((1, 2), dtype=np.float32)
113 | borders = np.zeros((1, 4), dtype=np.float32)
114 | sizes = np.zeros((1, 2), dtype=np.float32)
115 |
116 | out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
117 | height_ratio = out_height / inp_height
118 | width_ratio = out_width / inp_width
119 |
120 | resized_image = cv2.resize(image, (new_width, new_height))
121 | resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])
122 |
123 | resized_image = resized_image / 255.
124 | normalize_(resized_image, db.mean, db.std)
125 |
126 | images[0] = resized_image.transpose((2, 0, 1))
127 | borders[0] = border
128 | sizes[0] = [int(height * scale), int(width * scale)]
129 | ratios[0] = [height_ratio, width_ratio]
130 |
131 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
132 | images = torch.from_numpy(images)
133 | dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
134 | dets = dets.reshape(2, -1, 8) # bboxes, scores, tl_scores, br_scores, clses
135 | # here 2 cause the flip result
136 | center = center.reshape(2, -1, 4) # ct_xs, ct_ys, ct_clses, ct_scores
137 | dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]] # flip
138 | center[1, :, [0]] = out_width - center[1, :, [0]] # horizontal flip
139 | dets = dets.reshape(1, -1, 8)
140 | center = center.reshape(1, -1, 4)
141 |
142 | _rescale_dets(dets, ratios, borders, sizes)
143 | center[..., [0]] /= ratios[:, 1][:, None, None] # remap to origin image
144 | center[..., [1]] /= ratios[:, 0][:, None, None]
145 | center[..., [0]] -= borders[:, 2][:, None, None]
146 | center[..., [1]] -= borders[:, 0][:, None, None]
147 | np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]])
148 | np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]])
149 | dets[:, :, 0:4] /= scale
150 | center[:, :, 0:2] /= scale # remap to origin image
151 |
152 | if scale == 1:
153 | center_points.append(center)
154 | detections.append(dets)
155 |
156 | detections = np.concatenate(detections, axis=1)
157 | center_points = np.concatenate(center_points, axis=1)
158 |
159 | classes = detections[..., -1]
160 | classes = classes[0]
161 | detections = detections[0] # N x 8
162 | center_points = center_points[0] # N x 4
163 |
164 | valid_ind = detections[:, 4] > -1
165 | valid_detections = detections[valid_ind]
166 |
167 | box_width = valid_detections[:, 2] - valid_detections[:, 0]
168 | box_height = valid_detections[:, 3] - valid_detections[:, 1]
169 |
170 | s_ind = (box_width * box_height <= 22500)
171 | l_ind = (box_width * box_height > 22500)
172 |
173 | s_detections = valid_detections[s_ind]
174 | l_detections = valid_detections[l_ind]
175 | # trisection
176 | s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3 # x + (y-x)/3
177 | s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3 # x +2(y-x)/3
178 | s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
179 | s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3
180 |
181 | s_temp_score = copy.copy(s_detections[:, 4])
182 | s_detections[:, 4] = -1
183 |
184 | center_x = center_points[:, 0][:, np.newaxis]
185 | center_y = center_points[:, 1][:, np.newaxis]
186 | s_left_x = s_left_x[np.newaxis, :]
187 | s_right_x = s_right_x[np.newaxis, :]
188 | s_top_y = s_top_y[np.newaxis, :]
189 | s_bottom_y = s_bottom_y[np.newaxis, :]
190 | # located in center region
191 | ind_lx = (center_x - s_left_x) > 0
192 | ind_rx = (center_x - s_right_x) < 0
193 | ind_ty = (center_y - s_top_y) > 0
194 | ind_by = (center_y - s_bottom_y) < 0
195 | # same classes
196 | ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
197 | ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
198 | axis=0) == 1
199 | index_s_new_score = np.argmax(
200 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score],
201 | axis=0) # select the box having center located in the center region
202 | s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[
203 | index_s_new_score, 3]) / 3
204 |
205 | l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
206 | l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
207 | l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
208 | l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5
209 |
210 | l_temp_score = copy.copy(l_detections[:, 4])
211 | l_detections[:, 4] = -1
212 |
213 | center_x = center_points[:, 0][:, np.newaxis]
214 | center_y = center_points[:, 1][:, np.newaxis]
215 | l_left_x = l_left_x[np.newaxis, :]
216 | l_right_x = l_right_x[np.newaxis, :]
217 | l_top_y = l_top_y[np.newaxis, :]
218 | l_bottom_y = l_bottom_y[np.newaxis, :]
219 |
220 | ind_lx = (center_x - l_left_x) > 0
221 | ind_rx = (center_x - l_right_x) < 0
222 | ind_ty = (center_y - l_top_y) > 0
223 | ind_by = (center_y - l_bottom_y) < 0
224 | ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0
225 | ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
226 | axis=0) == 1
227 | index_l_new_score = np.argmax(
228 | ((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0)
229 | l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[
230 | index_l_new_score, 3]) / 3
231 |
232 | detections = np.concatenate([l_detections, s_detections], axis=0)
233 | detections = detections[np.argsort(-detections[:, 4])] # resort according to new scores
234 | classes = detections[..., -1]
235 |
236 | # for i in range(detections.shape[0]):
237 | # box_width = detections[i,2]-detections[i,0]
238 | # box_height = detections[i,3]-detections[i,1]
239 | # if box_width*box_height<=22500 and detections[i,4]!=-1:
240 | # left_x = (2*detections[i,0]+1*detections[i,2])/3
241 | # right_x = (1*detections[i,0]+2*detections[i,2])/3
242 | # top_y = (2*detections[i,1]+1*detections[i,3])/3
243 | # bottom_y = (1*detections[i,1]+2*detections[i,3])/3
244 | # temp_score = copy.copy(detections[i,4])
245 | # detections[i,4] = -1
246 | # for j in range(center_points.shape[0]):
247 | # if (classes[i] == center_points[j,2])and \
248 | # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
249 | # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
250 | # detections[i,4] = (temp_score*2 + center_points[j,3])/3
251 | # break
252 | # elif box_width*box_height > 22500 and detections[i,4]!=-1:
253 | # left_x = (3*detections[i,0]+2*detections[i,2])/5
254 | # right_x = (2*detections[i,0]+3*detections[i,2])/5
255 | # top_y = (3*detections[i,1]+2*detections[i,3])/5
256 | # bottom_y = (2*detections[i,1]+3*detections[i,3])/5
257 | # temp_score = copy.copy(detections[i,4])
258 | # detections[i,4] = -1
259 | # for j in range(center_points.shape[0]):
260 | # if (classes[i] == center_points[j,2])and \
261 | # (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
262 | # ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
263 | # detections[i,4] = (temp_score*2 + center_points[j,3])/3
264 | # break
265 |
266 | # reject detections with negative scores
267 | keep_inds = (detections[:, 4] > -1)
268 | detections = detections[keep_inds]
269 | classes = classes[keep_inds]
270 |
271 | # soft_nms
272 | top_bboxes[image_id] = {}
273 | for j in range(categories):
274 | keep_inds = (classes == j)
275 | top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
276 | if merge_bbox:
277 | soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm,
278 | weight_exp=weight_exp)
279 | else:
280 | soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
281 | top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5] # N x 5
282 |
283 | scores = np.hstack([
284 | top_bboxes[image_id][j][:, -1]
285 | for j in range(1, categories + 1)
286 | ])
287 |
288 | # top_box is a dict {each image result is still a dict contains results in each class}
289 | # select boxes
290 | if len(scores) > max_per_image:
291 | kth = len(scores) - max_per_image
292 | thresh = np.partition(scores, kth)[kth]
293 | for j in range(1, categories + 1):
294 | keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
295 | top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]
296 |
297 | if debug:
298 | image_file = db.image_file(db_ind)
299 | image = cv2.imread(image_file)
300 | im = image[:, :, (2, 1, 0)]
301 | fig, ax = plt.subplots(figsize=(12, 12))
302 | fig = ax.imshow(im, aspect='equal')
303 | plt.axis('off')
304 | fig.axes.get_xaxis().set_visible(False)
305 | fig.axes.get_yaxis().set_visible(False)
306 | # bboxes = {}
307 | for j in range(1, categories + 1):
308 | keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4)
309 | cat_name = db.class_name(j)
310 | for bbox in top_bboxes[image_id][j][keep_inds]:
311 | bbox = bbox[0:4].astype(np.int32)
312 | xmin = bbox[0]
313 | ymin = bbox[1]
314 | xmax = bbox[2]
315 | ymax = bbox[3]
316 | # if (xmax - xmin) * (ymax - ymin) > 5184:
317 | ax.add_patch(
318 | plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1],
319 | linewidth=4.0))
320 | ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name),
321 | bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5),
322 | fontsize=15, color='white', weight='bold')
323 |
324 | debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
325 | debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
326 | plt.savefig(debug_file1)
327 | plt.savefig(debug_file2)
328 | plt.close()
329 | # cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
330 |
331 | result_json = os.path.join(result_dir, "results.json")
332 | detections = db.convert_to_dict(top_bboxes)
333 | # with open(result_json, "w") as f:
334 | # json.dump(detections, f)
335 |
336 | db.evaluate(detections)
337 | return 0
338 |
339 |
340 | def testing(db, nnet, result_dir, debug=False):
341 | return globals()[system_configs.sampling_function](db, nnet, result_dir, debug=debug)
342 |
--------------------------------------------------------------------------------
/test/vis_utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | def draw_bboxes(image, bboxes, font_size=0.5, thresh=0.5, colors=None):
5 | """Draws bounding boxes on an image.
6 |
7 | Args:
8 | image: An image in OpenCV format
9 | bboxes: A dictionary representing bounding boxes of different object
10 | categories, where the keys are the names of the categories and the
11 | values are the bounding boxes. The bounding boxes of category should be
12 | stored in a 2D NumPy array, where each row is a bounding box (x1, y1,
13 | x2, y2, score).
14 | font_size: (Optional) Font size of the category names.
15 | thresh: (Optional) Only bounding boxes with scores above the threshold
16 | will be drawn.
17 | colors: (Optional) Color of bounding boxes for each category. If it is
18 | not provided, this function will use random color for each category.
19 |
20 | Returns:
21 | An image with bounding boxes.
22 | """
23 |
24 | image = image.copy()
25 | for cat_name in bboxes:
26 | keep_inds = bboxes[cat_name][:, -1] > thresh
27 | cat_size = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, font_size, 2)[0]
28 |
29 | if colors is None:
30 | color = np.random.random((3, )) * 0.6 + 0.4
31 | color = (color * 255).astype(np.int32).tolist()
32 | else:
33 | color = colors[cat_name]
34 |
35 | for bbox in bboxes[cat_name][keep_inds]:
36 | bbox = bbox[0:4].astype(np.int32)
37 | if bbox[1] - cat_size[1] - 2 < 0:
38 | cv2.rectangle(image,
39 | (bbox[0], bbox[1] + 2),
40 | (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
41 | color, -1
42 | )
43 | cv2.putText(image, cat_name,
44 | (bbox[0], bbox[1] + cat_size[1] + 2),
45 | cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1
46 | )
47 | else:
48 | cv2.rectangle(image,
49 | (bbox[0], bbox[1] - cat_size[1] - 2),
50 | (bbox[0] + cat_size[0], bbox[1] - 2),
51 | color, -1
52 | )
53 | cv2.putText(image, cat_name,
54 | (bbox[0], bbox[1] - 2),
55 | cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1
56 | )
57 | cv2.rectangle(image,
58 | (bbox[0], bbox[1]),
59 | (bbox[2], bbox[3]),
60 | color, 2
61 | )
62 | return image
63 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | import os
3 | import json
4 | import torch
5 | import queue
6 | import pprint
7 | import argparse
8 | import importlib
9 | import threading
10 | import traceback
11 | import numpy as np
12 |
13 | from tqdm import tqdm
14 | from utils import stdout_to_tqdm
15 | from db.datasets import datasets
16 | from config import system_configs
17 | from nnet.py_factory import NetworkFactory
18 | from torch.multiprocessing import Process, Queue
19 |
20 | os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1'
21 | torch.backends.cudnn.enabled = True
22 | torch.backends.cudnn.benchmark = True
23 |
24 |
25 | def parse_args():
26 | parser = argparse.ArgumentParser(description="Train CenterNet")
27 | parser.add_argument("--cfg_file", default='CenterNet-52', help="config file", type=str)
28 | parser.add_argument("--iter", dest="start_iter",
29 | help="train at iteration i",
30 | default=0, type=int)
31 | parser.add_argument("--threads", dest="threads", default=16, type=int)
32 |
33 | #args = parser.parse_args()
34 | args, unparsed = parser.parse_known_args()
35 | return args
36 |
37 | def prefetch_data(db, queue, sample_data, data_aug):
38 | ind = 0
39 | print("start prefetching data...")
40 | np.random.seed(os.getpid())
41 | while True:
42 | try:
43 | data, ind = sample_data(db, ind, data_aug=data_aug)
44 | queue.put(data)
45 | except Exception as e:
46 | traceback.print_exc()
47 | raise e
48 |
49 | def pin_memory(data_queue, pinned_data_queue, sema):
50 | while True:
51 | data = data_queue.get()
52 | data["xs"] = [x.pin_memory() for x in data["xs"]]
53 | data["ys"] = [y.pin_memory() for y in data["ys"]]
54 |
55 | pinned_data_queue.put(data)
56 | if sema.acquire(blocking=False):
57 | return
58 |
59 |
60 | def init_parallel_jobs(dbs, queue, fn, data_aug):
61 | tasks = [Process(target=prefetch_data, args=(db, queue, fn, data_aug)) for db in dbs]
62 | for task in tasks:
63 | task.daemon = True
64 | task.start()
65 | return tasks
66 |
67 |
68 | def train(training_dbs, validation_db, start_iter=0):
69 | learning_rate = system_configs.learning_rate
70 | max_iteration = system_configs.max_iter
71 | pretrained_model = system_configs.pretrain
72 | snapshot = system_configs.snapshot
73 | val_iter = system_configs.val_iter
74 | display = system_configs.display
75 | decay_rate = system_configs.decay_rate
76 | stepsize = system_configs.stepsize
77 |
78 | training_size = len(training_dbs[0].db_inds)
79 | validation_size = len(validation_db.db_inds)
80 |
81 | # queues storing data for training
82 | training_queue = Queue(system_configs.prefetch_size) # buffer size of prefetch data
83 | validation_queue = Queue(5)
84 |
85 | # queues storing pinned data for training
86 | pinned_training_queue = queue.Queue(system_configs.prefetch_size)
87 | pinned_validation_queue = queue.Queue(5)
88 |
89 | # load data sampling function
90 | data_file = "sample.{}".format(training_dbs[0].data)
91 | sample_data = importlib.import_module(data_file).sample_data
92 |
93 | #allocate resources for parallel reading
94 | training_tasks = init_parallel_jobs(training_dbs, training_queue, sample_data, True)
95 | if val_iter:
96 | validation_tasks = init_parallel_jobs([validation_db], validation_queue, sample_data, False)
97 |
98 | training_pin_semaphore = threading.Semaphore()
99 | validation_pin_semaphore = threading.Semaphore()
100 | training_pin_semaphore.acquire()
101 | validation_pin_semaphore.acquire()
102 |
103 | training_pin_args = (training_queue, pinned_training_queue, training_pin_semaphore)
104 | training_pin_thread = threading.Thread(target=pin_memory, args=training_pin_args)
105 | training_pin_thread.daemon = True
106 | training_pin_thread.start()
107 |
108 | validation_pin_args = (validation_queue, pinned_validation_queue, validation_pin_semaphore)
109 | validation_pin_thread = threading.Thread(target=pin_memory, args=validation_pin_args)
110 | validation_pin_thread.daemon = True
111 | validation_pin_thread.start()
112 |
113 | print("building model...")
114 | nnet = NetworkFactory(training_dbs[0])
115 |
116 |
117 | if pretrained_model is not None:
118 | if not os.path.exists(pretrained_model):
119 | raise ValueError("pretrained model does not exist")
120 | print("loading from pretrained model")
121 | nnet.load_pretrained_params(pretrained_model)
122 |
123 | if start_iter:
124 | learning_rate /= (decay_rate ** (start_iter // stepsize))
125 |
126 | nnet.load_params(start_iter)
127 | nnet.set_lr(learning_rate)
128 | print("training starts from iteration {} with learning_rate {}".format(start_iter + 1, learning_rate))
129 | else:
130 | nnet.set_lr(learning_rate)
131 |
132 | print("training start...")
133 | nnet.cuda()
134 | nnet.train_mode()
135 |
136 | with stdout_to_tqdm() as save_stdout:
137 | for iteration in tqdm(range(start_iter+1, max_iteration+1), file=save_stdout, ncols=80):
138 | training = pinned_training_queue.get(block=True)
139 | training_loss, focal_loss, pull_loss, push_loss, regr_loss = nnet.train(**training)
140 |
141 | if display and iteration % display == 0:
142 | print("training loss at iteration {}: {}".format(iteration, training_loss.item()))
143 | print("focal loss at iteration {}: {}".format(iteration, focal_loss.item()))
144 | print("pull loss at iteration {}: {}".format(iteration, pull_loss.item()))
145 | print("push loss at iteration {}: {}".format(iteration, push_loss.item()))
146 | print("regr loss at iteration {}: {}".format(iteration, regr_loss.item()))
147 |
148 | del training_loss, focal_loss, pull_loss, push_loss, regr_loss
149 |
150 | if val_iter and validation_db.db_inds.size and iteration % val_iter == 0:
151 | nnet.eval_mode()
152 | validation = pinned_validation_queue.get(block=True)
153 | validation_loss = nnet.validate(**validation)
154 | print("validation loss at iteration {}: {}".format(iteration, validation_loss.item()))
155 | nnet.train_mode()
156 |
157 | if iteration % snapshot == 0:
158 | nnet.save_params(iteration)
159 |
160 | if iteration % stepsize == 0:
161 | learning_rate /= decay_rate
162 | nnet.set_lr(learning_rate)
163 |
164 | # sending signal to kill the thread
165 | training_pin_semaphore.release()
166 | validation_pin_semaphore.release()
167 |
168 | # terminating data fetching processes
169 | for training_task in training_tasks:
170 | training_task.terminate()
171 | for validation_task in validation_tasks:
172 | validation_task.terminate()
173 |
174 |
175 | if __name__=="__main__":
176 | args = parse_args()
177 | cfg_file = os.path.join(system_configs.config_dir, args.cfg_file+".json")
178 | with open(cfg_file, "r") as f:
179 | configs = json.load(f)
180 |
181 | configs["system"]["snapshot_name"] = args.cfg_file
182 | system_configs.update_config(configs["system"])
183 |
184 | train_split = system_configs.train_split
185 | val_split = system_configs.val_split
186 | print("loading all datasets ...")
187 | dataset = system_configs.dataset
188 | threads = args.threads
189 |
190 | print("using {} threads".format(threads))
191 | training_dbs = [datasets[dataset](configs["db"], train_split) for _ in range(threads)]
192 | validation_db = datasets[dataset](configs["db"], val_split)
193 |
194 | print("system config ...")
195 | pprint.pprint(system_configs.full)
196 |
197 | print("db config...")
198 | pprint.pprint(training_dbs[0].configs)
199 |
200 | print("len of db: {}".format(len(training_dbs[0].db_inds)))
201 | train(training_dbs, validation_db, args.start_iter)
202 |
203 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .tqdm import stdout_to_tqdm
2 |
3 | from .image import crop_image
4 | from .image import color_jittering_, lighting_, normalize_
5 |
--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import random
4 |
5 | def grayscale(image):
6 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
7 |
8 | def normalize_(image, mean, std):
9 | image -= mean
10 | image /= std
11 |
12 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
13 | alpha = data_rng.normal(scale=alphastd, size=(3, ))
14 | image += np.dot(eigvec, eigval * alpha)
15 |
16 | def blend_(alpha, image1, image2):
17 | image1 *= alpha
18 | image2 *= (1 - alpha)
19 | image1 += image2
20 |
21 | def saturation_(data_rng, image, gs, gs_mean, var):
22 | alpha = 1. + data_rng.uniform(low=-var, high=var)
23 | blend_(alpha, image, gs[:, :, None])
24 |
25 | def brightness_(data_rng, image, gs, gs_mean, var):
26 | alpha = 1. + data_rng.uniform(low=-var, high=var)
27 | image *= alpha
28 |
29 | def contrast_(data_rng, image, gs, gs_mean, var):
30 | alpha = 1. + data_rng.uniform(low=-var, high=var)
31 | blend_(alpha, image, gs_mean)
32 |
33 | def color_jittering_(data_rng, image):
34 | functions = [brightness_, contrast_, saturation_]
35 | random.shuffle(functions)
36 |
37 | gs = grayscale(image)
38 | gs_mean = gs.mean()
39 | for f in functions:
40 | f(data_rng, image, gs, gs_mean, 0.4)
41 |
42 | def crop_image(image, center, size):
43 | cty, ctx = center
44 | height, width = size
45 | im_height, im_width = image.shape[0:2]
46 | cropped_image = np.zeros((height, width, 3), dtype=image.dtype)
47 |
48 | x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width)
49 | y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height)
50 |
51 | left, right = ctx - x0, x1 - ctx
52 | top, bottom = cty - y0, y1 - cty
53 |
54 | cropped_cty, cropped_ctx = height // 2, width // 2
55 | y_slice = slice(cropped_cty - top, cropped_cty + bottom)
56 | x_slice = slice(cropped_ctx - left, cropped_ctx + right)
57 | cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
58 |
59 | border = np.array([
60 | cropped_cty - top,
61 | cropped_cty + bottom,
62 | cropped_ctx - left,
63 | cropped_ctx + right
64 | ], dtype=np.float32)
65 |
66 | offset = np.array([
67 | cty - height // 2,
68 | ctx - width // 2
69 | ])
70 |
71 | return cropped_image, border, offset
72 |
--------------------------------------------------------------------------------
/utils/tqdm.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import numpy as np
3 | import contextlib
4 |
5 | from tqdm import tqdm
6 |
7 | class TqdmFile(object):
8 | dummy_file = None
9 | def __init__(self, dummy_file):
10 | self.dummy_file = dummy_file
11 |
12 | def write(self, x):
13 | if len(x.rstrip()) > 0:
14 | tqdm.write(x, file=self.dummy_file)
15 |
16 | @contextlib.contextmanager
17 | def stdout_to_tqdm():
18 | save_stdout = sys.stdout
19 | try:
20 | sys.stdout = TqdmFile(sys.stdout)
21 | yield save_stdout
22 | except Exception as exc:
23 | raise exc
24 | finally:
25 | sys.stdout = save_stdout
26 |
--------------------------------------------------------------------------------