├── db
    ├── __init__.py
    ├── datasets.py
    ├── detection.py
    ├── base.py
    └── cityperson.py
├── models
    ├── __init__.py
    ├── py_utils
    │   ├── _cpools
    │   │   ├── .gitignore
    │   │   ├── setup.py
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   ├── bottom_pool.cpp
    │   │   │   ├── right_pool.cpp
    │   │   │   ├── left_pool.cpp
    │   │   │   └── top_pool.cpp
    │   ├── __init__.py
    │   ├── scatter_gather.py
    │   ├── utils.py
    │   ├── data_parallel.py
    │   ├── kp_utils.py
    │   └── kp.py
    ├── CenterNet-104.py
    └── CenterNet-52.py
├── nnet
    ├── __init__.py
    └── py_factory.py
├── sample
    ├── __init__.py
    ├── utils.py
    └── pedestrian.py
├── test
    ├── __init__.py
    ├── base.py
    ├── detector.py
    ├── vis_utils.py
    ├── centernet.py
    └── pedestrian.py
├── external
    ├── __init__.py
    ├── Makefile
    ├── nms.so
    ├── .gitignore
    ├── setup.py
    └── nms.pyx
├── data
    └── demo
    │   ├── 000001.jpg
    │   └── 000003.jpg
├── utils
    ├── __init__.py
    ├── tqdm.py
    └── image.py
├── .idea
    ├── modules.xml
    └── CenterNetPerson.iml
├── demo.py
├── config
    ├── CenterNet-52-multi_scale.json
    └── CenterNet-52.json
├── README.md
├── test.py
├── config.py
├── conda_packagelist.txt
└── train.py


/db/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/nnet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/sample/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/external/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/py_utils/_cpools/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | cpools.egg-info/
3 | dist/
4 | 


--------------------------------------------------------------------------------
/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/external/nms.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/HEAD/external/nms.so


--------------------------------------------------------------------------------
/data/demo/000001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/HEAD/data/demo/000001.jpg


--------------------------------------------------------------------------------
/data/demo/000003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZongweiZhou1/CenterNetPerson/HEAD/data/demo/000003.jpg


--------------------------------------------------------------------------------
/db/datasets.py:
--------------------------------------------------------------------------------
1 | from db.cityperson import CityPerson
2 | 
3 | datasets = {
4 | 	"CityPerson": CityPerson
5 | 	# "MCOCO": MCOCO
6 | }


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .tqdm import stdout_to_tqdm
2 | 
3 | from .image import crop_image
4 | from .image import color_jittering_, lighting_, normalize_
5 | 


--------------------------------------------------------------------------------
/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/models/py_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .kp import kp, AELoss
2 | from .kp_utils import _neg_loss
3 | 
4 | from .utils import convolution, fully_connected, residual
5 | 
6 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool
7 | 


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/CenterNetPerson.iml" filepath="$PROJECT_DIR$/.idea/CenterNetPerson.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "nms", 
 9 |         ["nms.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     name="coco",
16 |     ext_modules=cythonize(extensions),
17 |     include_dirs=[numpy.get_include()]
18 | )
19 | 


--------------------------------------------------------------------------------
/models/py_utils/_cpools/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension
 3 | 
 4 | setup(
 5 |     name="cpools",
 6 |     ext_modules=[
 7 |         CppExtension("top_pool", ["src/top_pool.cpp"]),
 8 |         CppExtension("bottom_pool", ["src/bottom_pool.cpp"]),
 9 |         CppExtension("left_pool", ["src/left_pool.cpp"]),
10 |         CppExtension("right_pool", ["src/right_pool.cpp"])
11 |     ],
12 |     cmdclass={
13 |         "build_ext": BuildExtension
14 |     }
15 | )
16 | 


--------------------------------------------------------------------------------
/.idea/CenterNetPerson.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$">
 5 |       <excludeFolder url="file://$MODULE_DIR$/data/CityPerson" />
 6 |     </content>
 7 |     <orderEntry type="inheritedJdk" />
 8 |     <orderEntry type="sourceFolder" forTests="false" />
 9 |   </component>
10 |   <component name="TestRunnerService">
11 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
12 |   </component>
13 | </module>


--------------------------------------------------------------------------------
/utils/tqdm.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import contextlib
 4 | 
 5 | from tqdm import tqdm
 6 | 
 7 | class TqdmFile(object):
 8 |     dummy_file = None
 9 |     def __init__(self, dummy_file):
10 |         self.dummy_file = dummy_file
11 | 
12 |     def write(self, x):
13 |         if len(x.rstrip()) > 0:
14 |             tqdm.write(x, file=self.dummy_file)
15 | 
16 | @contextlib.contextmanager
17 | def stdout_to_tqdm():
18 |     save_stdout = sys.stdout
19 |     try:
20 |         sys.stdout = TqdmFile(sys.stdout)
21 |         yield save_stdout
22 |     except Exception as exc:
23 |         raise exc
24 |     finally:
25 |         sys.stdout = save_stdout
26 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import cv2
 4 | import time
 5 | from tqdm import tqdm
 6 | from test.vis_utils import draw_bboxes
 7 | from test.detector import CenterNet as Detector
 8 | 
 9 | os.environ['CUDA_VISIBLE_DEVICES']='2'
10 | detector = Detector("CenterNet-52", iter=10000)
11 | t0 = time.time()
12 | image_names = [img for img in os.listdir('data/demo') if img[-3:]=='jpg']
13 | for i in tqdm(range(len(image_names))):
14 |   image = cv2.imread('data/demo/{}'.format(image_names[i]))
15 |   bboxes = detector(image)
16 |   image  = draw_bboxes(image, bboxes)
17 |   cv2.imwrite("tmp_squeeze/{}.jpg".format(str(i).zfill(6)), image)
18 |   cv2.imshow('image', image)
19 |   cv2.waitKey(10)
20 | 
21 | t1 = time.time()
22 | print("speed: %f s"%((t1-t0)/100))
23 | 


--------------------------------------------------------------------------------
/test/base.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | from nnet.py_factory import NetworkFactory
 4 | 
 5 | class Base(object):
 6 |     def __init__(self, db, nnet, func, model=None):
 7 |         super(Base, self).__init__()
 8 | 
 9 |         self._db   = db
10 |         self._nnet = nnet
11 |         self._func = func
12 | 
13 |         if model is not None:
14 |             self._nnet.load_pretrained_params(model)
15 | 
16 |         self._nnet.cuda()
17 |         self._nnet.eval_mode()
18 | 
19 |     def _inference(self, image, *args, **kwargs):
20 |         return self._func(self._db, self._nnet, image.copy(), *args, **kwargs)
21 | 
22 |     def __call__(self, image, *args, **kwargs):
23 |         categories = self._db.configs["categories"]
24 |         bboxes     = self._inference(image, *args, **kwargs)
25 |         return {'pedestrian': bboxes[j] for j in range(1, categories + 1)}
26 | 
27 | def load_cfg(cfg_file):
28 |     with open(cfg_file, "r") as f:
29 |         cfg = json.load(f)
30 | 
31 |     cfg_sys = cfg["system"]
32 |     cfg_db  = cfg["db"]
33 |     return cfg_sys, cfg_db
34 | 
35 | def load_nnet(cfg_sys):
36 |     return NetworkFactory(cfg_sys)
37 | 


--------------------------------------------------------------------------------
/config/CenterNet-52-multi_scale.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "system": {
 3 |         "dataset": "MSCOCO",
 4 |         "batch_size": 24,
 5 |         "sampling_function": "kp_detection",
 6 | 
 7 |         "train_split": "trainval",
 8 |         "val_split": "minival",
 9 | 
10 |         "learning_rate": 0.00025,
11 |         "decay_rate": 10,
12 | 
13 |         "val_iter": 500,
14 | 
15 |         "opt_algo": "adam",
16 |         "prefetch_size": 6,
17 | 
18 |         "max_iter": 480000,
19 |         "stepsize": 450000,
20 |         "snapshot": 5000,
21 | 
22 |         "chunk_sizes": [6,6,6,6],
23 | 
24 |         "data_dir": "./data"
25 |     },
26 |     
27 |     "db": {
28 |         "rand_scale_min": 0.6,
29 |         "rand_scale_max": 1.4,
30 |         "rand_scale_step": 0.1,
31 |         "rand_scales": null,
32 | 
33 |         "rand_crop": true,
34 |         "rand_color": true,
35 | 
36 |         "border": 128,
37 |         "gaussian_bump": true,
38 | 
39 |         "input_size": [511, 511],
40 |         "output_sizes": [[128, 128]],
41 | 
42 |         "test_scales": [0.6, 1, 1.2],
43 | 
44 |         "top_k": 70,
45 |         "categories": 80,
46 |         "kp_categories": 1,
47 |         "ae_threshold": 0.5,
48 |         "nms_threshold": 0.5,
49 | 
50 |         "merge_bbox": true,
51 |         "weight_exp": 10,
52 | 
53 |         "max_per_image": 100
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/config/CenterNet-52.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "system": {
 3 |         "dataset": "CityPerson",
 4 |         "batch_size": 12,
 5 |         "sampling_function": "kp_detection",
 6 | 
 7 |         "train_split": "trainval",
 8 |         "val_split": "minival",
 9 | 
10 |         "learning_rate": 0.00025,
11 |         "decay_rate": 10,
12 | 
13 |         "val_iter": 500,
14 | 
15 |         "opt_algo": "adam",
16 |         "prefetch_size": 20,
17 | 
18 |         "max_iter": 400000,
19 |         "stepsize": 350000,
20 |         "snapshot": 10000,
21 | 
22 |         "chunk_sizes": [6, 6],
23 | 
24 |         "data_dir": "/data/zwzhou/Data/CityPerson"
25 |     },
26 |     
27 |     "db": {
28 |         "rand_scale_min": 0.6,
29 |         "rand_scale_max": 1.4,
30 |         "rand_scale_step": 0.1,
31 |         "rand_scales": null,
32 | 
33 |         "rand_crop": true,
34 |         "rand_color": true,
35 | 
36 |         "border": 128,
37 |         "gaussian_bump": true,
38 | 
39 |         "input_size": [511, 511],
40 |         "output_sizes": [[128, 128]],
41 | 
42 |         "test_scales": [1],
43 | 
44 |         "top_k": 70,
45 |         "categories": 1,
46 |         "kp_categories": 1,
47 |         "ae_threshold": 0.5,
48 |         "nms_threshold": 0.5,
49 | 
50 |         "max_per_image": 100,
51 |         "pretrained_model": "cache/nnet/CenterNet-52/CenterNet-52_480000.pkl"
52 | 
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/test/detector.py:
--------------------------------------------------------------------------------
 1 | from .base import Base, load_cfg, load_nnet
 2 | from config import system_configs
 3 | from db.datasets import datasets
 4 | import pkg_resources
 5 | import importlib
 6 | import os
 7 | 
 8 | _package_name = __name__
 9 | 
10 | 
11 | def get_file_path(*paths):
12 |     path = "/".join(paths)
13 |     return pkg_resources.resource_filename(_package_name, path)
14 | 
15 | 
16 | class CenterNet(Base):
17 |     def __init__(self, cfg_file, iter=10000, suffix=None):
18 |         from test.centernet import inference
19 | 
20 |         model = importlib.import_module('models.%s'%cfg_file).model
21 |         if suffix is None:
22 |             cfg_path = os.path.join(system_configs.config_dir, "%s.json" % cfg_file)
23 |         else:
24 |             cfg_path = os.path.join(system_configs.config_dir, "%s-%s.json" % (cfg_file, suffix))
25 |         model_path = get_file_path("..", "cache", "nnet", cfg_file, "%s_%d.pkl" % (cfg_file, iter))
26 |         cfg_sys, cfg_db = load_cfg(cfg_path)
27 |         cfg_sys["snapshot_name"] = cfg_file
28 |         system_configs.update_config(cfg_sys)
29 |         dataset = system_configs.dataset
30 |         train_split = system_configs.train_split
31 |         val_split = system_configs.val_split
32 |         test_split = system_configs.test_split
33 | 
34 |         split = {
35 | 			"training": train_split,
36 | 			"validation": val_split,
37 | 			"testing": test_split
38 | 		}["validation"]
39 | 
40 |         demo = datasets[dataset](cfg_db, split)
41 | 
42 |         centernet = load_nnet(demo)
43 |         super(CenterNet, self).__init__(demo, centernet, inference, model=model_path)
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ##Pedestrian detection based on CenterNet
 3 | 
 4 | In this repo, we re-train the centernet on CityPerson dataset to get a pedestrian detector
 5 | [CenterNet](https://github.com/Duankaiwen/CenterNet)
 6 | 
 7 | 
 8 | ##Preparation
 9 | 
10 | Please first install [Anaconda](https://anaconda.org) and create an Anaconda environment using the provided package list.
11 | ```
12 | conda create --name CenterNet --file conda_packagelist.txt
13 | ```
14 | 
15 | After you create the environment, activate it.
16 | ```
17 | source activate CenterNet
18 | ```
19 | 
20 | ## Compiling Corner Pooling Layers
21 | ```
22 | cd <CenterNet dir>/models/py_utils/_cpools/
23 | python setup.py install --user
24 | ```
25 | 
26 | ## Compiling NMS
27 | ```
28 | cd <CenterNet dir>/external
29 | make
30 | ```
31 | 
32 | ## CityPerson dataset
33 | 
34 | - Download the CityPerson dataset and label files in [images](https://www.cityscapes-dataset.com/file-handling/?packageID=3), [label](https://www.cityscapes-dataset.com/file-handling/?packageID=28)
35 | - create a softlink in `data` to your CityPerson data
36 |     ```
37 |     ln -s  #to/yourdata/CityPerson data/
38 |     ```
39 |  
40 | ## Training and Evaluation
41 | To train CenterNet-52
42 | ```buildoutcfg
43 | python train.py --cfg_file CenterNet-52
44 | ```
45 | The default configure in `config/CenterNet-52.json` is 2 (12G) GPUs and batchsize=12, you can modify them according to your case.
46 | 
47 | To evaluate your detector
48 | ```buildoutcfg
49 | python test.py --cfg_file CenterNet-52 --testiter  #checkpoint_epoch
50 | ```
51 | 
52 | ## Demo
53 | The demo images are stored in `data/demo`
54 | ```buildoutcfg
55 | python demo.py
56 | ```


--------------------------------------------------------------------------------
/models/py_utils/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/utils/image.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import random
 4 | 
 5 | def grayscale(image):
 6 |     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 7 | 
 8 | def normalize_(image, mean, std):
 9 |     image -= mean
10 |     image /= std
11 | 
12 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
13 |     alpha = data_rng.normal(scale=alphastd, size=(3, ))
14 |     image += np.dot(eigvec, eigval * alpha)
15 | 
16 | def blend_(alpha, image1, image2):
17 |     image1 *= alpha
18 |     image2 *= (1 - alpha)
19 |     image1 += image2
20 | 
21 | def saturation_(data_rng, image, gs, gs_mean, var):
22 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
23 |     blend_(alpha, image, gs[:, :, None])
24 | 
25 | def brightness_(data_rng, image, gs, gs_mean, var):
26 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
27 |     image *= alpha
28 | 
29 | def contrast_(data_rng, image, gs, gs_mean, var):
30 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
31 |     blend_(alpha, image, gs_mean)
32 | 
33 | def color_jittering_(data_rng, image):
34 |     functions = [brightness_, contrast_, saturation_]
35 |     random.shuffle(functions)
36 | 
37 |     gs = grayscale(image)
38 |     gs_mean = gs.mean()
39 |     for f in functions:
40 |         f(data_rng, image, gs, gs_mean, 0.4)
41 | 
42 | def crop_image(image, center, size):
43 |     cty, ctx            = center
44 |     height, width       = size
45 |     im_height, im_width = image.shape[0:2]
46 |     cropped_image       = np.zeros((height, width, 3), dtype=image.dtype)
47 | 
48 |     x0, x1 = max(0, ctx - width // 2), min(ctx + width // 2, im_width)
49 |     y0, y1 = max(0, cty - height // 2), min(cty + height // 2, im_height)
50 | 
51 |     left, right = ctx - x0, x1 - ctx
52 |     top, bottom = cty - y0, y1 - cty
53 | 
54 |     cropped_cty, cropped_ctx = height // 2, width // 2
55 |     y_slice = slice(cropped_cty - top, cropped_cty + bottom)
56 |     x_slice = slice(cropped_ctx - left, cropped_ctx + right)
57 |     cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
58 | 
59 |     border = np.array([
60 |        cropped_cty - top,
61 |        cropped_cty + bottom,
62 |        cropped_ctx - left,
63 |        cropped_ctx + right
64 |     ], dtype=np.float32)
65 | 
66 |     offset = np.array([
67 |         cty - height // 2,
68 |         ctx - width  // 2
69 |     ])
70 | 
71 |     return cropped_image, border, offset
72 | 


--------------------------------------------------------------------------------
/models/py_utils/_cpools/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | import sys
 6 | import os
 7 | sys.path.append(os.path.join(os.path.dirname(__file__),'dist/cpools-0.0.0-py3.6-linux-x86_64.egg'))
 8 | import top_pool, bottom_pool, left_pool, right_pool
 9 | 
10 | class TopPoolFunction(Function):
11 |     @staticmethod
12 |     def forward(ctx, input):
13 |         output = top_pool.forward(input)[0]
14 |         ctx.save_for_backward(input)
15 |         return output
16 | 
17 |     @staticmethod
18 |     def backward(ctx, grad_output):
19 |         input  = ctx.saved_variables[0]
20 |         output = top_pool.backward(input, grad_output)[0]
21 |         return output
22 | 
23 | class BottomPoolFunction(Function):
24 |     @staticmethod
25 |     def forward(ctx, input):
26 |         output = bottom_pool.forward(input)[0]
27 |         ctx.save_for_backward(input)
28 |         return output
29 | 
30 |     @staticmethod
31 |     def backward(ctx, grad_output):
32 |         input  = ctx.saved_variables[0]
33 |         output = bottom_pool.backward(input, grad_output)[0]
34 |         return output
35 | 
36 | class LeftPoolFunction(Function):
37 |     @staticmethod
38 |     def forward(ctx, input):
39 |         output = left_pool.forward(input)[0]
40 |         ctx.save_for_backward(input)
41 |         return output
42 | 
43 |     @staticmethod
44 |     def backward(ctx, grad_output):
45 |         input  = ctx.saved_variables[0]
46 |         output = left_pool.backward(input, grad_output)[0]
47 |         return output
48 | 
49 | class RightPoolFunction(Function):
50 |     @staticmethod
51 |     def forward(ctx, input):
52 |         output = right_pool.forward(input)[0]
53 |         ctx.save_for_backward(input)
54 |         return output
55 | 
56 |     @staticmethod
57 |     def backward(ctx, grad_output):
58 |         input  = ctx.saved_variables[0]
59 |         output = right_pool.backward(input, grad_output)[0]
60 |         return output
61 | 
62 | class TopPool(nn.Module):
63 |     def forward(self, x):
64 |         return TopPoolFunction.apply(x)
65 | 
66 | class BottomPool(nn.Module):
67 |     def forward(self, x):
68 |         return BottomPoolFunction.apply(x)
69 | 
70 | class LeftPool(nn.Module):
71 |     def forward(self, x):
72 |         return LeftPoolFunction.apply(x)
73 | 
74 | class RightPool(nn.Module):
75 |     def forward(self, x):
76 |         return RightPoolFunction.apply(x)
77 | 


--------------------------------------------------------------------------------
/db/detection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from db.base import BASE
 3 | 
 4 | 
 5 | class DETECTION(BASE):
 6 | 	def __init__(self, db_config):
 7 | 		super(DETECTION, self).__init__()
 8 | 
 9 | 		self._configs["categories"] = 2
10 | 		self._configs["kp_categories"] = 1
11 | 		self._configs["rand_scales"] = [1]
12 | 		self._configs["rand_scale_min"] = 0.8
13 | 		self._configs["rand_scale_max"] = 1.4
14 | 		self._configs["rand_scale_step"] = 0.2
15 | 
16 | 		self._configs["input_size"] = [511]
17 | 		self._configs["output_sizes"] = [[128, 128]]
18 | 
19 | 		self._configs["nms_threshold"] = 0.5
20 | 		self._configs["max_per_image"] = 100
21 | 		self._configs["top_k"] = 100
22 | 		self._configs["ae_threshold"] = 0.5
23 | 		self._configs["nms_kernel"] = 3
24 | 
25 | 		self._configs["nms_algorithm"] = "exp_soft_nms"
26 | 		self._configs["weight_exp"] = 8
27 | 		self._configs["merge_bbox"] = False
28 | 
29 | 		self._configs["data_aug"] = True
30 | 		self._configs["lighting"] = True
31 | 
32 | 		self._configs["border"] = 128
33 | 		self._configs["gaussian_bump"] = True
34 | 		self._configs["gaussian_iou"] = 0.7
35 | 		self._configs["gaussian_radius"] = -1
36 | 		self._configs["rand_crop"] = False
37 | 		self._configs["rand_color"] = False
38 | 		self._configs["rand_pushes"] = False
39 | 		self._configs["rand_samples"] = False
40 | 		self._configs["special_crop"] = False
41 | 
42 | 		self._configs["test_scales"] = [1]
43 | 
44 | 		self._train_cfg["rcnn"] = dict(
45 | 			assigner=dict(
46 | 				pos_iou_thr=0.5,
47 | 				neg_iou_thr=0.5,
48 | 				min_pos_iou=0.5,
49 | 				ignore_iof_thr=-1),
50 | 			sampler=dict(
51 | 				num=512,
52 | 				pos_fraction=0.25,
53 | 				neg_pos_ub=-1,
54 | 				add_gt_as_proposals=True,
55 | 				pos_balance_sampling=False,
56 | 				neg_balance_thr=0),
57 | 			mask_size=28,
58 | 			pos_weight=-1,
59 | 			debug=False)
60 | 
61 | 		self._model['bbox_roi_extractor'] = dict(
62 | 			type='SingleRoIExtractor',
63 | 			roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
64 | 			out_channels=256,
65 | 			featmap_strides=[4])
66 | 
67 | 		self._model['bbox_head'] = dict(
68 | 			type='SharedFCBBoxHead',
69 | 			num_fcs=2,
70 | 			in_channels=256,
71 | 			fc_out_channels=1024,
72 | 			roi_feat_size=7,
73 | 			num_classes=81,
74 | 			target_means=[0., 0., 0., 0.],
75 | 			target_stds=[0.1, 0.1, 0.2, 0.2],
76 | 			reg_class_agnostic=False)
77 | 
78 | 		self.update_config(db_config)
79 | 
80 | 		if self._configs["rand_scales"] is None:
81 | 			self._configs["rand_scales"] = np.arange(
82 | 				self._configs["rand_scale_min"],
83 | 				self._configs["rand_scale_max"],
84 | 				self._configs["rand_scale_step"]
85 | 			)
86 | 


--------------------------------------------------------------------------------
/test/vis_utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | def draw_bboxes(image, bboxes, font_size=0.5, thresh=0.5, colors=None):
 5 |     """Draws bounding boxes on an image.
 6 | 
 7 |     Args:
 8 |         image: An image in OpenCV format
 9 |         bboxes: A dictionary representing bounding boxes of different object
10 |             categories, where the keys are the names of the categories and the
11 |             values are the bounding boxes. The bounding boxes of category should be
12 |             stored in a 2D NumPy array, where each row is a bounding box (x1, y1,
13 |             x2, y2, score).
14 |         font_size: (Optional) Font size of the category names.
15 |         thresh: (Optional) Only bounding boxes with scores above the threshold
16 |             will be drawn.
17 |         colors: (Optional) Color of bounding boxes for each category. If it is
18 |             not provided, this function will use random color for each category.
19 | 
20 |     Returns:
21 |         An image with bounding boxes.
22 |     """
23 | 
24 |     image = image.copy()
25 |     for cat_name in bboxes:
26 |         keep_inds = bboxes[cat_name][:, -1] > thresh
27 |         cat_size  = cv2.getTextSize(cat_name, cv2.FONT_HERSHEY_SIMPLEX, font_size, 2)[0]
28 | 
29 |         if colors is None:
30 |             color = np.random.random((3, )) * 0.6 + 0.4
31 |             color = (color * 255).astype(np.int32).tolist()
32 |         else:
33 |             color = colors[cat_name]
34 | 
35 |         for bbox in bboxes[cat_name][keep_inds]:
36 |             bbox = bbox[0:4].astype(np.int32)
37 |             if bbox[1] - cat_size[1] - 2 < 0:
38 |                 cv2.rectangle(image,
39 |                     (bbox[0], bbox[1] + 2),
40 |                     (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
41 |                     color, -1
42 |                 )
43 |                 cv2.putText(image, cat_name,
44 |                     (bbox[0], bbox[1] + cat_size[1] + 2),
45 |                     cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1
46 |                 )
47 |             else:
48 |                 cv2.rectangle(image,
49 |                     (bbox[0], bbox[1] - cat_size[1] - 2),
50 |                     (bbox[0] + cat_size[0], bbox[1] - 2),
51 |                     color, -1
52 |                 )
53 |                 cv2.putText(image, cat_name,
54 |                     (bbox[0], bbox[1] - 2),
55 |                     cv2.FONT_HERSHEY_SIMPLEX, font_size, (0, 0, 0), thickness=1
56 |                 )
57 |             cv2.rectangle(image,
58 |                 (bbox[0], bbox[1]),
59 |                 (bbox[2], bbox[3]),
60 |                 color, 2
61 |             )
62 |     return image
63 | 


--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/bottom_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(2, 0);
16 |     at::Tensor output_temp = output.select(2, 0);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 0; ind < height - 1; ++ind) {
21 |         input_temp  = input.select(2, ind + 1);
22 |         output_temp = output.select(2, ind);
23 |         max_temp    = output.select(2, ind + 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     auto output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, width});
46 | 
47 |     auto input_temp = input.select(2, 0);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(0);
51 | 
52 |     auto output_temp      = output.select(2, 0);
53 |     auto grad_output_temp = grad_output.select(2, 0);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(2);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, width});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
59 |     for (int32_t ind = 0; ind < height - 1; ++ind) {
60 |         input_temp = input.select(2, ind + 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, ind + 1);
66 | 
67 |         grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
68 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &pool_forward, "Bottom Pool Forward",
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &pool_backward, "Bottom Pool Backward",
83 |         py::call_guard<py::gil_scoped_release>()
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/right_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(3, 0);
16 |     at::Tensor output_temp = output.select(3, 0);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 0; ind < width - 1; ++ind) {
21 |         input_temp  = input.select(3, ind + 1);
22 |         output_temp = output.select(3, ind);
23 |         max_temp    = output.select(3, ind + 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     at::Tensor output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, height});
46 | 
47 |     auto input_temp = input.select(3, 0);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(0);
51 | 
52 |     auto output_temp      = output.select(3, 0);
53 |     auto grad_output_temp = grad_output.select(3, 0);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(3);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, height});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
59 |     for (int32_t ind = 0; ind < width - 1; ++ind) {
60 |         input_temp = input.select(3, ind + 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, ind + 1);
66 | 
67 |         grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
68 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &pool_forward, "Right Pool Forward",
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &pool_backward, "Right Pool Backward",
83 |         py::call_guard<py::gil_scoped_release>()     
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/left_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(3, width - 1);
16 |     at::Tensor output_temp = output.select(3, width - 1);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 1; ind < width; ++ind) {
21 |         input_temp  = input.select(3, width - ind - 1);
22 |         output_temp = output.select(3, width - ind);
23 |         max_temp    = output.select(3, width - ind - 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     auto output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, height});
46 | 
47 |     auto input_temp = input.select(3, width - 1);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(width - 1);
51 | 
52 |     auto output_temp      = output.select(3, width - 1);
53 |     auto grad_output_temp = grad_output.select(3, width - 1);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(3);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, height});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, height});
59 |     for (int32_t ind = 1; ind < width; ++ind) {
60 |         input_temp = input.select(3, width - ind - 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, width - ind - 1);
66 | 
67 |         grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
68 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &pool_forward, "Left Pool Forward", 
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &pool_backward, "Left Pool Backward", 
83 |         py::call_guard<py::gil_scoped_release>()
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/models/py_utils/_cpools/src/top_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> top_pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     // Copy the last column
15 |     at::Tensor input_temp  = input.select(2, height - 1);
16 |     at::Tensor output_temp = output.select(2, height - 1);
17 |     output_temp.copy_(input_temp);
18 | 
19 |     at::Tensor max_temp;
20 |     for (int64_t ind = 1; ind < height; ++ind) {
21 |         input_temp  = input.select(2, height - ind - 1);
22 |         output_temp = output.select(2, height - ind);
23 |         max_temp    = output.select(2, height - ind - 1);
24 | 
25 |         at::max_out(max_temp, input_temp, output_temp);
26 |     }
27 | 
28 |     return { 
29 |         output
30 |     };
31 | }
32 | 
33 | std::vector<at::Tensor> top_pool_backward(
34 |     at::Tensor input,
35 |     at::Tensor grad_output
36 | ) {
37 |     auto output = at::zeros_like(input);
38 | 
39 |     int32_t batch   = input.size(0);
40 |     int32_t channel = input.size(1);
41 |     int32_t height  = input.size(2);
42 |     int32_t width   = input.size(3);
43 | 
44 |     auto max_val = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
45 |     auto max_ind = at::zeros(torch::CUDA(at::kLong),  {batch, channel, width});
46 | 
47 |     auto input_temp = input.select(2, height - 1);
48 |     max_val.copy_(input_temp);
49 | 
50 |     max_ind.fill_(height - 1);
51 | 
52 |     auto output_temp      = output.select(2, height - 1);
53 |     auto grad_output_temp = grad_output.select(2, height - 1);
54 |     output_temp.copy_(grad_output_temp);
55 | 
56 |     auto un_max_ind = max_ind.unsqueeze(2);
57 |     auto gt_mask    = at::zeros(torch::CUDA(at::kByte),  {batch, channel, width});
58 |     auto max_temp   = at::zeros(torch::CUDA(at::kFloat), {batch, channel, width});
59 |     for (int32_t ind = 1; ind < height; ++ind) {
60 |         input_temp = input.select(2, height - ind - 1);
61 |         at::gt_out(gt_mask, input_temp, max_val);
62 | 
63 |         at::masked_select_out(max_temp, input_temp, gt_mask);
64 |         max_val.masked_scatter_(gt_mask, max_temp);
65 |         max_ind.masked_fill_(gt_mask, height - ind - 1);
66 | 
67 |         grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
68 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
69 |     }
70 | 
71 |     return {
72 |         output
73 |     };
74 | }
75 | 
76 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
77 |     m.def(
78 |         "forward", &top_pool_forward, "Top Pool Forward",
79 |         py::call_guard<py::gil_scoped_release>()
80 |     );
81 |     m.def(
82 |         "backward", &top_pool_backward, "Top Pool Backward",
83 |         py::call_guard<py::gil_scoped_release>()
84 |     );
85 | }
86 | 


--------------------------------------------------------------------------------
/db/base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import h5py
  3 | import numpy as np
  4 | 
  5 | from config import system_configs
  6 | 
  7 | class BASE(object):
  8 |     def __init__(self):
  9 |         self._split = None
 10 |         self._db_inds = []
 11 |         self._image_ids = []
 12 | 
 13 |         self._data            = None
 14 |         self._image_hdf5      = None
 15 |         self._image_file      = None
 16 |         self._image_hdf5_file = None
 17 | 
 18 |         self._mean    = np.zeros((3, ), dtype=np.float32)
 19 |         self._std     = np.ones((3, ), dtype=np.float32)
 20 |         self._eig_val = np.ones((3, ), dtype=np.float32)
 21 |         self._eig_vec = np.zeros((3, 3), dtype=np.float32)
 22 | 
 23 |         self._configs             = {}
 24 |         self._train_cfg           = {}
 25 |         self._model               = {}
 26 |         self._configs["data_aug"] = True
 27 | 
 28 |         self._data_rng            = None
 29 | 
 30 |     @property
 31 |     def data(self):
 32 |         if self._data is None:
 33 |             raise ValueError("data is not set")
 34 |         return self._data
 35 | 
 36 |     @property
 37 |     def configs(self):
 38 |         return self._configs
 39 | 
 40 |     @property
 41 |     def train_cfg(self):
 42 |         return self._train_cfg
 43 | 
 44 |     @property
 45 |     def model(self):
 46 |         return self._model
 47 | 
 48 |     @property
 49 |     def mean(self):
 50 |         return self._mean
 51 | 
 52 |     @property
 53 |     def std(self):
 54 |         return self._std
 55 | 
 56 |     @property
 57 |     def eig_val(self):
 58 |         return self._eig_val
 59 | 
 60 |     @property
 61 |     def eig_vec(self):
 62 |         return self._eig_vec
 63 | 
 64 |     @property
 65 |     def db_inds(self):
 66 |         return self._db_inds
 67 | 
 68 |     @property
 69 |     def split(self):
 70 |         return self._split
 71 | 
 72 |     def update_config(self, new):
 73 |         for key in new:
 74 |             if key in self._configs:
 75 |                 self._configs[key] = new[key]
 76 | 
 77 |     def image_ids(self, ind):
 78 |         return self._image_ids[ind]
 79 | 
 80 |     def image_file(self, ind):
 81 |         if self._image_file is None:
 82 |             raise ValueError("Image path is not initialized")
 83 | 
 84 |         image_id = self._image_ids[ind]
 85 |         return self._image_file.format(image_id)
 86 | 
 87 |     def write_result(self, ind, all_bboxes, all_scores):
 88 |         pass
 89 | 
 90 |     def evaluate(self, name):
 91 |         pass
 92 | 
 93 |     def shuffle_inds(self, quiet=False):
 94 |         if self._data_rng is None:
 95 |             self._data_rng = np.random.RandomState(os.getpid())
 96 | 
 97 |         if not quiet:
 98 |             print("shuffling indices...")
 99 |         rand_perm = self._data_rng.permutation(len(self._db_inds))
100 |         self._db_inds = self._db_inds[rand_perm]
101 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import torch
 4 | import pprint
 5 | import argparse
 6 | import importlib
 7 | import numpy as np
 8 | 
 9 | import matplotlib
10 | matplotlib.use("Agg")
11 | 
12 | from config import system_configs
13 | from nnet.py_factory import NetworkFactory
14 | from db.datasets import datasets
15 | 
16 | os.environ["CUDA_VISIBLE_DEVICES"] = '2'
17 | torch.backends.cudnn.benchmark = False
18 | 
19 | def parse_args():
20 | 	parser = argparse.ArgumentParser(description="Test CenterNet")
21 | 	parser.add_argument('--cfg_file', default='CenterNet-52', help='config file', type=str)
22 | 	parser.add_argument('--testiter', dest='testiter', help='test at iteration i',
23 | 						default=10000, type=int)
24 | 	parser.add_argument('--split', dest='split', help='which split to use',
25 | 						default='validation', type=str)
26 | 	parser.add_argument('--suffix', dest="suffix", default=None, type=str)
27 | 	parser.add_argument('--debug', action='store_true')
28 | 
29 | 	args = parser.parse_args()
30 | 	return args
31 | 
32 | def make_dirs(directories):
33 | 	for directory in directories:
34 | 		if not os.path.exists(directory):
35 | 			os.makedirs(directory)
36 | 
37 | def test(db, split, testiter, debug=False, suffix=None):
38 | 	result_dir = system_configs.result_dir
39 | 	result_dir = os.path.join(result_dir, str(testiter), split)
40 | 
41 | 	if suffix is not None:
42 | 		result_dir = os.path.join(result_dir, suffix)
43 | 
44 | 	make_dirs([result_dir])
45 | 
46 | 	test_iter = system_configs.max_iter if testiter is None else testiter
47 | 	print("loading parameters at iteration: {}".format(testiter))
48 | 
49 | 	print("building network ...")
50 | 	nnet = NetworkFactory(db)
51 | 	print("loading parameters ...")
52 | 	nnet.load_params(test_iter)
53 | 
54 | 	test_file = 'test.{}'.format(db.data)
55 | 	testing = importlib.import_module(test_file).testing
56 | 
57 | 	nnet.cuda()
58 | 	nnet.eval_mode()
59 | 	testing(db, nnet, result_dir, debug=debug)
60 | 
61 | 
62 | if __name__ == '__main__':
63 | 	args = parse_args()
64 | 	if args.suffix is None:
65 | 		cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + '.json')
66 | 	else:
67 | 		cfg_file = os.path.join(system_configs.config_dir, args.cfg_file + '-{}.json'.format(args.suffix))
68 | 	print("cfg file: {}".format(cfg_file))
69 | 
70 | 	with open(cfg_file, "r") as f:
71 | 		configs = json.load(f)
72 | 
73 | 	configs["system"]["snapshot_name"] = args.cfg_file
74 | 	system_configs.update_config(configs["system"])
75 | 
76 | 	train_split = system_configs.train_split
77 | 	val_split 	= system_configs.val_split
78 | 
79 | 	split = {
80 |         "training": train_split,
81 |         "validation": val_split,
82 |     }[args.split]
83 | 
84 | 	print("loading all datasets ...")
85 | 	dataset = system_configs.dataset
86 | 	print("split: {}".format(split))
87 | 	testing_db = datasets[dataset](configs["db"], split)
88 | 
89 | 	print("system config...")
90 | 	pprint.pprint(system_configs.full)
91 | 
92 | 	print("db config...")
93 | 	pprint.pprint(testing_db.configs)
94 | 
95 | 	test(testing_db, args.split, args.testiter, args.debug, args.suffix)
96 | 
97 | 


--------------------------------------------------------------------------------
/sample/utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | def gaussian2D(shape, sigma=1):
 5 |     m, n = [(ss - 1.) / 2. for ss in shape]
 6 |     y, x = np.ogrid[-m:m+1,-n:n+1]
 7 | 
 8 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
 9 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
10 |     return h
11 | 
12 | def draw_gaussian(heatmap, center, radius, k=1, delte=6):
13 |     diameter = 2 * radius + 1
14 |     gaussian = gaussian2D((diameter, diameter), sigma=diameter / delte)
15 | 
16 |     x, y = center
17 | 
18 |     height, width = heatmap.shape[0:2]
19 |     
20 |     left, right = min(x, radius), min(width - x, radius + 1)
21 |     top, bottom = min(y, radius), min(height - y, radius + 1)
22 | 
23 |     masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]
24 |     masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
25 |     np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
26 | 
27 | def gaussian_radius(det_size, min_overlap):
28 |     height, width = det_size
29 | 
30 |     a1  = 1
31 |     b1  = (height + width)
32 |     c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
33 |     sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
34 |     r1  = (b1 + sq1) / 2
35 | 
36 |     a2  = 4
37 |     b2  = 2 * (height + width)
38 |     c2  = (1 - min_overlap) * width * height
39 |     sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
40 |     r2  = (b2 + sq2) / 2
41 | 
42 |     a3  = 4 * min_overlap
43 |     b3  = -2 * min_overlap * (height + width)
44 |     c3  = (min_overlap - 1) * width * height
45 |     sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
46 |     r3  = (b3 + sq3) / 2
47 |     return min(r1, r2, r3)
48 | 
49 | def _get_border(border, size):
50 |     i = 1
51 |     while size - border // i <= border // i:
52 |         i *= 2
53 |     return border // i
54 | 
55 | def random_crop(image, detections, random_scales, view_size, border=64):
56 |     view_height, view_width   = view_size
57 |     image_height, image_width = image.shape[0:2]
58 | 
59 |     scale  = np.random.choice(random_scales)
60 |     height = int(view_height * scale)
61 |     width  = int(view_width  * scale)
62 | 
63 |     cropped_image = np.zeros((height, width, 3), dtype=image.dtype)
64 | 
65 |     w_border = _get_border(border, image_width)
66 |     h_border = _get_border(border, image_height)
67 | 
68 |     ctx = np.random.randint(low=w_border, high=image_width - w_border)
69 |     cty = np.random.randint(low=h_border, high=image_height - h_border)
70 | 
71 |     x0, x1 = max(ctx - width // 2, 0),  min(ctx + width // 2, image_width)
72 |     y0, y1 = max(cty - height // 2, 0), min(cty + height // 2, image_height)
73 | 
74 |     left_w, right_w = ctx - x0, x1 - ctx
75 |     top_h, bottom_h = cty - y0, y1 - cty
76 | 
77 |     # crop image
78 |     cropped_ctx, cropped_cty = width // 2, height // 2
79 |     x_slice = slice(cropped_ctx - left_w, cropped_ctx + right_w)
80 |     y_slice = slice(cropped_cty - top_h, cropped_cty + bottom_h)
81 |     cropped_image[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]
82 | 
83 |     # crop detections
84 |     cropped_detections = detections.copy()
85 |     cropped_detections[:, 0:4:2] -= x0
86 |     cropped_detections[:, 1:4:2] -= y0
87 |     cropped_detections[:, 0:4:2] += cropped_ctx - left_w
88 |     cropped_detections[:, 1:4:2] += cropped_cty - top_h
89 | 
90 |     return cropped_image, cropped_detections
91 | 


--------------------------------------------------------------------------------
/models/py_utils/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | # a convolution module containing a convolution layer, a bn layer and a relu activation
 5 | # the sizes of input and output are same
 6 | class convolution(nn.Module):
 7 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
 8 |         super(convolution, self).__init__()
 9 | 
10 |         pad = (k - 1) // 2
11 |         self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
12 |         self.bn   = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
13 |         self.relu = nn.ReLU(inplace=True)
14 | 
15 |     def forward(self, x):
16 |         conv = self.conv(x)
17 |         bn   = self.bn(conv)
18 |         relu = self.relu(bn)
19 |         return relu
20 | 
21 | # a fully-contection module containg a linear layer, a bn layer or not and a relu activation
22 | class fully_connected(nn.Module):
23 |     def __init__(self, inp_dim, out_dim, with_bn=True):
24 |         super(fully_connected, self).__init__()
25 |         self.with_bn = with_bn
26 | 
27 |         self.linear = nn.Linear(inp_dim, out_dim)
28 |         if self.with_bn:
29 |             self.bn = nn.BatchNorm1d(out_dim)
30 |         self.relu   = nn.ReLU(inplace=True)
31 | 
32 |     def forward(self, x):
33 |         linear = self.linear(x)
34 |         bn     = self.bn(linear) if self.with_bn else linear
35 |         relu   = self.relu(bn)
36 |         return relu
37 | 
38 | # a residual module containg two branches, master branch contains two convolution layers while
39 | # the skip contains a convolution layer if stride is not equals to 1 in master branch or the
40 | # input channel isnot equal to the output channel
41 | class residual(nn.Module):
42 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
43 |         super(residual, self).__init__()
44 | 
45 |         self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
46 |         self.bn1   = nn.BatchNorm2d(out_dim)
47 |         self.relu1 = nn.ReLU(inplace=True)
48 | 
49 |         self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
50 |         self.bn2   = nn.BatchNorm2d(out_dim)
51 |         
52 |         self.skip  = nn.Sequential(
53 |             nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
54 |             nn.BatchNorm2d(out_dim)
55 |         ) if stride != 1 or inp_dim != out_dim else nn.Sequential()
56 |         self.relu  = nn.ReLU(inplace=True)
57 | 
58 |     def forward(self, x):
59 |         conv1 = self.conv1(x)
60 |         bn1   = self.bn1(conv1)
61 |         relu1 = self.relu1(bn1)
62 | 
63 |         conv2 = self.conv2(relu1)
64 |         bn2   = self.bn2(conv2)
65 | 
66 |         skip  = self.skip(x)
67 |         return self.relu(bn2 + skip)
68 | 
69 | # stack modules layers, here the default layer is convolution module
70 | # only the first convolution module's input channel can not be equal to output's channel
71 | def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
72 |     layers = [layer(k, inp_dim, out_dim, **kwargs)]
73 |     for _ in range(1, modules):
74 |         layers.append(layer(k, out_dim, out_dim, **kwargs))
75 |     return nn.Sequential(*layers)
76 | 
77 | # the reverse operation of `make_layer`
78 | def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
79 |     layers = []
80 |     for _ in range(modules - 1):
81 |         layers.append(layer(k, inp_dim, inp_dim, **kwargs))
82 |     layers.append(layer(k, inp_dim, out_dim, **kwargs))
83 |     return nn.Sequential(*layers)
84 | 


--------------------------------------------------------------------------------
/models/CenterNet-104.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .py_utils import kp, AELoss, _neg_loss, convolution, residual
  5 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool
  6 | 
  7 | class pool(nn.Module):
  8 |     def __init__(self, dim, pool1, pool2):
  9 |         super(pool, self).__init__()
 10 |         self.p1_conv1 = convolution(3, dim, 128)
 11 |         self.p2_conv1 = convolution(3, dim, 128)
 12 | 
 13 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
 14 |         self.p_bn1   = nn.BatchNorm2d(dim)
 15 | 
 16 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
 17 |         self.bn1   = nn.BatchNorm2d(dim)
 18 |         self.relu1 = nn.ReLU(inplace=True)
 19 | 
 20 |         self.conv2 = convolution(3, dim, dim)
 21 | 
 22 |         self.pool1 = pool1()
 23 |         self.pool2 = pool2()
 24 | 
 25 |         self.look_conv1 = convolution(3, dim, 128)
 26 |         self.look_conv2 = convolution(3, dim, 128)
 27 |         self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
 28 |         self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
 29 | 
 30 |     def forward(self, x):
 31 |         # pool 1
 32 |         look_conv1   = self.look_conv1(x)
 33 |         p1_conv1     = self.p1_conv1(x)
 34 |         look_right   = self.pool2(look_conv1)
 35 |         P1_look_conv = self.P1_look_conv(p1_conv1+look_right)
 36 |         pool1        = self.pool1(P1_look_conv)
 37 | 
 38 |         # pool 2
 39 |         look_conv2   = self.look_conv2(x)
 40 |         p2_conv1 = self.p2_conv1(x)
 41 |         look_down   = self.pool1(look_conv2)
 42 |         P2_look_conv = self.P2_look_conv(p2_conv1+look_down)
 43 |         pool2    = self.pool2(P2_look_conv)
 44 | 
 45 |         # pool 1 + pool 2
 46 |         p_conv1 = self.p_conv1(pool1 + pool2)
 47 |         p_bn1   = self.p_bn1(p_conv1)
 48 | 
 49 |         conv1 = self.conv1(x)
 50 |         bn1   = self.bn1(conv1)
 51 |         relu1 = self.relu1(p_bn1 + bn1)
 52 | 
 53 |         conv2 = self.conv2(relu1)
 54 |         return conv2
 55 | 
 56 | class pool_cross(nn.Module):
 57 |     def __init__(self, dim, pool1, pool2, pool3, pool4):
 58 |         super(pool_cross, self).__init__()
 59 |         self.p1_conv1 = convolution(3, dim, 128)
 60 |         self.p2_conv1 = convolution(3, dim, 128)
 61 | 
 62 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
 63 |         self.p_bn1   = nn.BatchNorm2d(dim)
 64 | 
 65 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
 66 |         self.bn1   = nn.BatchNorm2d(dim)
 67 |         self.relu1 = nn.ReLU(inplace=True)
 68 | 
 69 |         self.conv2 = convolution(3, dim, dim)
 70 | 
 71 |         self.pool1 = pool1()
 72 |         self.pool2 = pool2()
 73 |         self.pool3 = pool3()
 74 |         self.pool4 = pool4()
 75 | 
 76 |     def forward(self, x):
 77 |         # pool 1
 78 |         p1_conv1 = self.p1_conv1(x)
 79 |         pool1    = self.pool1(p1_conv1)
 80 |         pool1    = self.pool3(pool1)
 81 | 
 82 |         # pool 2
 83 |         p2_conv1 = self.p2_conv1(x)
 84 |         pool2    = self.pool2(p2_conv1)
 85 |         pool2    = self.pool4(pool2)
 86 | 
 87 |         # pool 1 + pool 2
 88 |         p_conv1 = self.p_conv1(pool1 + pool2)
 89 |         p_bn1   = self.p_bn1(p_conv1)
 90 | 
 91 |         conv1 = self.conv1(x)
 92 |         bn1   = self.bn1(conv1)
 93 |         relu1 = self.relu1(p_bn1 + bn1)
 94 | 
 95 |         conv2 = self.conv2(relu1)
 96 |         return conv2
 97 | 
 98 | class tl_pool(pool):
 99 |     def __init__(self, dim):
100 |         super(tl_pool, self).__init__(dim, TopPool, LeftPool)
101 | 
102 | class br_pool(pool):
103 |     def __init__(self, dim):
104 |         super(br_pool, self).__init__(dim, BottomPool, RightPool)
105 | 
106 | class center_pool(pool_cross):
107 |     def __init__(self, dim):
108 |         super(center_pool, self).__init__(dim, TopPool, LeftPool, BottomPool, RightPool)
109 | 
110 | def make_tl_layer(dim):
111 |     return tl_pool(dim)
112 | 
113 | def make_br_layer(dim):
114 |     return br_pool(dim)
115 | 
116 | def make_ct_layer(dim):
117 |     return center_pool(dim)
118 | 
119 | def make_pool_layer(dim):
120 |     return nn.Sequential()
121 | 
122 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
123 |     layers  = [layer(kernel, dim0, dim1, stride=2)]
124 |     layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
125 |     return nn.Sequential(*layers)
126 | 
127 | class model(kp):
128 |     def __init__(self, db):
129 |         n       = 5
130 |         dims    = [256, 256, 384, 384, 384, 512]
131 |         modules = [2, 2, 2, 2, 2, 4]
132 |         out_dim = 1
133 | 
134 |         super(model, self).__init__(
135 |             db, n, 2, dims, modules, out_dim,
136 |             make_tl_layer=make_tl_layer,
137 |             make_br_layer=make_br_layer,
138 |             make_ct_layer=make_ct_layer,
139 |             make_pool_layer=make_pool_layer,
140 |             make_hg_layer=make_hg_layer,
141 |             kp_layer=residual, cnv_dim=256
142 |         )
143 | 
144 | loss = AELoss(pull_weight=1e-1, push_weight=1e-1, focal_loss=_neg_loss)
145 | 


--------------------------------------------------------------------------------
/models/CenterNet-52.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .py_utils import kp, AELoss, _neg_loss, convolution, residual
  5 | from .py_utils import TopPool, BottomPool, LeftPool, RightPool
  6 | 
  7 | class pool(nn.Module):
  8 |     def __init__(self, dim, pool1, pool2):
  9 |         super(pool, self).__init__()
 10 |         self.p1_conv1 = convolution(3, dim, 128)
 11 |         self.p2_conv1 = convolution(3, dim, 128)
 12 | 
 13 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
 14 |         self.p_bn1   = nn.BatchNorm2d(dim)
 15 | 
 16 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
 17 |         self.bn1   = nn.BatchNorm2d(dim)
 18 |         self.relu1 = nn.ReLU(inplace=True)
 19 | 
 20 |         self.conv2 = convolution(3, dim, dim)
 21 | 
 22 |         self.pool1 = pool1()
 23 |         self.pool2 = pool2()
 24 | 
 25 |         self.look_conv1 = convolution(3, dim, 128)
 26 |         self.look_conv2 = convolution(3, dim, 128)
 27 |         self.P1_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
 28 |         self.P2_look_conv = nn.Conv2d(128, 128, (3, 3), padding=(1, 1), bias=False)
 29 | 
 30 |     def forward(self, x):
 31 |         # pool 1
 32 |         look_conv1   = self.look_conv1(x)
 33 |         p1_conv1     = self.p1_conv1(x)
 34 |         look_right   = self.pool2(look_conv1)
 35 |         P1_look_conv = self.P1_look_conv(p1_conv1+look_right)
 36 |         pool1        = self.pool1(P1_look_conv)
 37 | 
 38 |         # pool 2
 39 |         look_conv2   = self.look_conv2(x)
 40 |         p2_conv1 = self.p2_conv1(x)
 41 |         look_down   = self.pool1(look_conv2)
 42 |         P2_look_conv = self.P2_look_conv(p2_conv1+look_down)
 43 |         pool2    = self.pool2(P2_look_conv)
 44 | 
 45 |         # pool 1 + pool 2
 46 |         p_conv1 = self.p_conv1(pool1 + pool2)
 47 |         p_bn1   = self.p_bn1(p_conv1)
 48 | 
 49 |         conv1 = self.conv1(x)
 50 |         bn1   = self.bn1(conv1)
 51 |         relu1 = self.relu1(p_bn1 + bn1)
 52 | 
 53 |         conv2 = self.conv2(relu1)
 54 |         return conv2
 55 | 
 56 | class pool_cross(nn.Module):
 57 |     def __init__(self, dim, pool1, pool2, pool3, pool4):
 58 |         super(pool_cross, self).__init__()
 59 |         self.p1_conv1 = convolution(3, dim, 128)
 60 |         self.p2_conv1 = convolution(3, dim, 128)
 61 | 
 62 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
 63 |         self.p_bn1   = nn.BatchNorm2d(dim)
 64 | 
 65 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
 66 |         self.bn1   = nn.BatchNorm2d(dim)
 67 |         self.relu1 = nn.ReLU(inplace=True)
 68 | 
 69 |         self.conv2 = convolution(3, dim, dim)
 70 | 
 71 |         self.pool1 = pool1()
 72 |         self.pool2 = pool2()
 73 |         self.pool3 = pool3()
 74 |         self.pool4 = pool4()
 75 | 
 76 |     def forward(self, x):
 77 |         # pool 1
 78 |         p1_conv1 = self.p1_conv1(x)
 79 |         pool1    = self.pool1(p1_conv1)
 80 |         pool1    = self.pool3(pool1)
 81 | 
 82 |         # pool 2
 83 |         p2_conv1 = self.p2_conv1(x)
 84 |         pool2    = self.pool2(p2_conv1)
 85 |         pool2    = self.pool4(pool2)
 86 | 
 87 |         # pool 1 + pool 2
 88 |         p_conv1 = self.p_conv1(pool1 + pool2)
 89 |         p_bn1   = self.p_bn1(p_conv1)
 90 | 
 91 |         conv1 = self.conv1(x)
 92 |         bn1   = self.bn1(conv1)
 93 |         relu1 = self.relu1(p_bn1 + bn1)
 94 | 
 95 |         conv2 = self.conv2(relu1)
 96 |         return conv2
 97 | 
 98 | class tl_pool(pool):
 99 |     def __init__(self, dim):
100 |         super(tl_pool, self).__init__(dim, TopPool, LeftPool)
101 | 
102 | class br_pool(pool):
103 |     def __init__(self, dim):
104 |         super(br_pool, self).__init__(dim, BottomPool, RightPool)
105 | 
106 | class center_pool(pool_cross):
107 |     def __init__(self, dim):
108 |         super(center_pool, self).__init__(dim, TopPool, LeftPool, BottomPool, RightPool)
109 | 
110 | def make_tl_layer(dim):
111 |     return tl_pool(dim)
112 | 
113 | def make_br_layer(dim):
114 |     return br_pool(dim)
115 | 
116 | def make_ct_layer(dim):
117 |     return center_pool(dim)
118 | 
119 | def make_pool_layer(dim):
120 |     return nn.Sequential()
121 | 
122 | def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
123 |     layers  = [layer(kernel, dim0, dim1, stride=2)]
124 |     layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
125 |     return nn.Sequential(*layers)
126 | 
127 | class model(kp):
128 |     def __init__(self, db):
129 |         n       = 5
130 |         dims    = [256, 256, 384, 384, 384, 512]
131 |         modules = [2, 2, 2, 2, 2, 4]
132 |         out_dim = 1  # categatory num
133 | 
134 |         super(model, self).__init__(
135 |             db, n, 1, dims, modules, out_dim,
136 |             make_tl_layer=make_tl_layer,
137 |             make_br_layer=make_br_layer,
138 |             make_ct_layer=make_ct_layer,
139 |             make_pool_layer=make_pool_layer,
140 |             make_hg_layer=make_hg_layer,
141 |             kp_layer=residual, cnv_dim=256
142 |         )
143 | 
144 | loss = AELoss(pull_weight=1e-1, push_weight=1e-1, focal_loss=_neg_loss)
145 | 


--------------------------------------------------------------------------------
/models/py_utils/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | from .scatter_gather import scatter_kwargs
  8 | 
  9 | class DataParallel(Module):
 10 |     r"""Implements data parallelism at the module level.
 11 | 
 12 |     This container parallelizes the application of the given module by
 13 |     splitting the input across the specified devices by chunking in the batch
 14 |     dimension. In the forward pass, the module is replicated on each device,
 15 |     and each replica handles a portion of the input. During the backwards
 16 |     pass, gradients from each replica are summed into the original module.
 17 | 
 18 |     The batch size should be larger than the number of GPUs used. It should
 19 |     also be an integer multiple of the number of GPUs so that each chunk is the
 20 |     same size (so that each GPU processes the same number of samples).
 21 | 
 22 |     See also: :ref:`cuda-nn-dataparallel-instead`
 23 | 
 24 |     Arbitrary positional and keyword inputs are allowed to be passed into
 25 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 26 |     specified (default 0). Primitive types will be broadcasted, but all
 27 |     other types will be a shallow copy and can be corrupted if written to in
 28 |     the model's forward pass.
 29 | 
 30 |     Args:
 31 |         module: module to be parallelized
 32 |         device_ids: CUDA devices (default: all devices)
 33 |         output_device: device location of output (default: device_ids[0])
 34 | 
 35 |     Example::
 36 | 
 37 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 38 |         >>> output = net(input_var)
 39 |     """
 40 | 
 41 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 42 | 
 43 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 44 |         super(DataParallel, self).__init__()
 45 | 
 46 |         if not torch.cuda.is_available():
 47 |             self.module = module
 48 |             self.device_ids = []
 49 |             return
 50 | 
 51 |         if device_ids is None:
 52 |             device_ids = list(range(torch.cuda.device_count()))
 53 |         if output_device is None:
 54 |             output_device = device_ids[0]
 55 |         self.dim = dim
 56 |         self.module = module
 57 |         self.device_ids = device_ids
 58 |         self.chunk_sizes = chunk_sizes
 59 |         self.output_device = output_device
 60 |         if len(self.device_ids) == 1:
 61 |             self.module.cuda(device_ids[0])
 62 | 
 63 |     def forward(self, *inputs, **kwargs):
 64 |         if not self.device_ids:
 65 |             return self.module(*inputs, **kwargs)
 66 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 67 |         if len(self.device_ids) == 1:
 68 |             return self.module(*inputs[0], **kwargs[0])
 69 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 70 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 71 |         return self.gather(outputs, self.output_device)
 72 | 
 73 |     def replicate(self, module, device_ids):
 74 |         return replicate(module, device_ids)
 75 | 
 76 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 77 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 78 | 
 79 |     def parallel_apply(self, replicas, inputs, kwargs):
 80 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 81 | 
 82 |     def gather(self, outputs, output_device):
 83 |         return gather(outputs, output_device, dim=self.dim)
 84 | 
 85 | 
 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 87 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 88 | 
 89 |     This is the functional version of the DataParallel module.
 90 | 
 91 |     Args:
 92 |         module: the module to evaluate in parallel
 93 |         inputs: inputs to the module
 94 |         device_ids: GPU ids on which to replicate module
 95 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 96 |             (default: device_ids[0])
 97 |     Returns:
 98 |         a Variable containing the result of module(input) located on
 99 |         output_device
100 |     """
101 |     if not isinstance(inputs, tuple):
102 |         inputs = (inputs,)
103 | 
104 |     if device_ids is None:
105 |         device_ids = list(range(torch.cuda.device_count()))
106 | 
107 |     if output_device is None:
108 |         output_device = device_ids[0]
109 | 
110 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
111 |     if len(device_ids) == 1:
112 |         return module(*inputs[0], **module_kwargs[0])
113 |     used_device_ids = device_ids[:len(inputs)]
114 |     replicas = replicate(module, used_device_ids)
115 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
116 |     return gather(outputs, output_device, dim)
117 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | class Config:
  5 |     def __init__(self):
  6 |         self._configs = {}
  7 |         self._configs["dataset"]           = None
  8 |         self._configs["sampling_function"] = "kp_detection"
  9 | 
 10 |         # Training Config
 11 |         self._configs["display"]           = 50
 12 |         self._configs["snapshot"]          = 5000
 13 |         self._configs["stepsize"]          = 450000
 14 |         self._configs["learning_rate"]     = 0.00025
 15 |         self._configs["decay_rate"]        = 10
 16 |         self._configs["max_iter"]          = 500000
 17 |         self._configs["val_iter"]          = 100
 18 |         self._configs["batch_size"]        = 1
 19 |         self._configs["snapshot_name"]     = None
 20 |         self._configs["prefetch_size"]     = 100
 21 |         self._configs["weight_decay"]      = False
 22 |         self._configs["weight_decay_rate"] = 1e-5
 23 |         self._configs["weight_decay_type"] = "l2"
 24 |         self._configs["pretrain"]          = None
 25 |         self._configs["opt_algo"]          = "adam"
 26 |         self._configs["chunk_sizes"]       = None
 27 | 
 28 |         # Directories
 29 |         self._configs["data_dir"]   = "/data/zwzhou/Data/MOT17/MOT17Labels"
 30 |         self._configs["cache_dir"]  = "cache"
 31 |         self._configs["config_dir"] = "config"
 32 |         self._configs["result_dir"] = "results"
 33 | 
 34 |         # Split
 35 |         self._configs["train_split"] = "trainval"
 36 |         self._configs["val_split"]   = "minival"
 37 |         self._configs["test_split"]  = "testdev"
 38 | 
 39 |         # Rng
 40 |         self._configs["data_rng"] = np.random.RandomState(123)
 41 |         self._configs["nnet_rng"] = np.random.RandomState(317)
 42 | 
 43 | 
 44 |     @property
 45 |     def chunk_sizes(self):
 46 |         return self._configs["chunk_sizes"]
 47 | 
 48 |     @property
 49 |     def train_split(self):
 50 |         return self._configs["train_split"]
 51 | 
 52 |     @property
 53 |     def val_split(self):
 54 |         return self._configs["val_split"]
 55 | 
 56 |     @property
 57 |     def test_split(self):
 58 |         return self._configs["test_split"]
 59 | 
 60 |     @property
 61 |     def full(self):
 62 |         return self._configs
 63 | 
 64 |     @property
 65 |     def sampling_function(self):
 66 |         return self._configs["sampling_function"]
 67 | 
 68 |     @property
 69 |     def data_rng(self):
 70 |         return self._configs["data_rng"]
 71 | 
 72 |     @property
 73 |     def nnet_rng(self):
 74 |         return self._configs["nnet_rng"]
 75 | 
 76 |     @property
 77 |     def opt_algo(self):
 78 |         return self._configs["opt_algo"]
 79 | 
 80 |     @property
 81 |     def weight_decay_type(self):
 82 |         return self._configs["weight_decay_type"]
 83 | 
 84 |     @property
 85 |     def prefetch_size(self):
 86 |         return self._configs["prefetch_size"]
 87 | 
 88 |     @property
 89 |     def pretrain(self):
 90 |         return self._configs["pretrain"]
 91 | 
 92 |     @property
 93 |     def weight_decay_rate(self):
 94 |         return self._configs["weight_decay_rate"]
 95 | 
 96 |     @property
 97 |     def weight_decay(self):
 98 |         return self._configs["weight_decay"]
 99 | 
100 |     @property
101 |     def result_dir(self):
102 |         result_dir = os.path.join(self._configs["result_dir"], self.snapshot_name)
103 |         if not os.path.exists(result_dir):
104 |             os.makedirs(result_dir)
105 |         return result_dir
106 | 
107 |     @property
108 |     def dataset(self):
109 |         return self._configs["dataset"]
110 | 
111 |     @property
112 |     def snapshot_name(self):
113 |         return self._configs["snapshot_name"]
114 | 
115 |     @property
116 |     def snapshot_dir(self):
117 |         snapshot_dir = os.path.join(self.cache_dir, "nnet", self.snapshot_name)
118 | 
119 |         if not os.path.exists(snapshot_dir):
120 |             os.makedirs(snapshot_dir)
121 | 
122 |         return snapshot_dir
123 | 
124 |     @property
125 |     def snapshot_file(self):
126 |         snapshot_file = os.path.join(self.snapshot_dir, self.snapshot_name + "_{}.pkl")
127 |         return snapshot_file
128 | 
129 |     @property
130 |     def config_dir(self):
131 |         return self._configs["config_dir"]
132 | 
133 |     @property
134 |     def batch_size(self):
135 |         return self._configs["batch_size"]
136 | 
137 |     @property
138 |     def max_iter(self):
139 |         return self._configs["max_iter"]
140 | 
141 |     @property
142 |     def learning_rate(self):
143 |         return self._configs["learning_rate"]
144 | 
145 |     @property
146 |     def decay_rate(self):
147 |         return self._configs["decay_rate"]
148 | 
149 |     @property
150 |     def stepsize(self):
151 |         return self._configs["stepsize"]
152 | 
153 |     @property
154 |     def snapshot(self):
155 |         return self._configs["snapshot"]
156 | 
157 |     @property
158 |     def display(self):
159 |         return self._configs["display"]
160 | 
161 |     @property
162 |     def val_iter(self):
163 |         return self._configs["val_iter"]
164 | 
165 |     @property
166 |     def data_dir(self):
167 |         return self._configs["data_dir"]
168 | 
169 |     @property
170 |     def cache_dir(self):
171 |         if not os.path.exists(self._configs["cache_dir"]):
172 |             os.makedirs(self._configs["cache_dir"])
173 |         return self._configs["cache_dir"]
174 | 
175 |     def update_config(self, new):
176 |         for key in new:
177 |             if key in self._configs:
178 |                 self._configs[key] = new[key]
179 | 
180 | system_configs = Config()
181 | 


--------------------------------------------------------------------------------
/nnet/py_factory.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pdb
  3 | import torch
  4 | import importlib
  5 | import torch.nn as nn
  6 | 
  7 | from config import system_configs
  8 | from models.py_utils.data_parallel import DataParallel
  9 | 
 10 | torch.manual_seed(317)
 11 | 
 12 | class Network(nn.Module):
 13 |     def __init__(self, model, loss):
 14 |         super(Network, self).__init__()
 15 | 
 16 |         self.model = model
 17 |         self.loss  = loss
 18 | 
 19 |     def forward(self, xs, ys, **kwargs):
 20 |         preds = self.model(*xs, **kwargs)
 21 |         loss_kp  = self.loss(preds, ys, **kwargs)
 22 |         return loss_kp
 23 | 
 24 | # for model backward compatibility
 25 | # previously model was wrapped by DataParallel module
 26 | class DummyModule(nn.Module):
 27 |     def __init__(self, model):
 28 |         super(DummyModule, self).__init__()
 29 |         self.module = model
 30 | 
 31 |     def forward(self, *xs, **kwargs):
 32 |         return self.module(*xs, **kwargs)
 33 | 
 34 | 
 35 | class NetworkFactory(object):
 36 |     def __init__(self, db):
 37 |         super(NetworkFactory, self).__init__()
 38 | 
 39 |         module_file = "models.{}".format(system_configs.snapshot_name)
 40 |         print("module_file: {}".format(module_file))
 41 |         nnet_module = importlib.import_module(module_file)
 42 | 
 43 |         self.model   = DummyModule(nnet_module.model(db))
 44 |         self.loss    = nnet_module.loss
 45 |         self.network = Network(self.model, self.loss)
 46 |         self.network = DataParallel(self.network, chunk_sizes=system_configs.chunk_sizes).cuda()
 47 |         self.load_cropped_pretrained_model("cache/nnet/CenterNet-52/CenterNet-52_480000.pkl")
 48 | 
 49 |         total_params = 0
 50 |         for params in self.model.parameters():
 51 |             num_params = 1
 52 |             for x in params.size():
 53 |                 num_params *= x
 54 |             total_params += num_params
 55 |         print("total parameters: {}".format(total_params))
 56 | 
 57 |         # self.fix_layers()  # fix kps and prelayer
 58 | 
 59 |         if system_configs.opt_algo == "adam":
 60 |             self.optimizer = torch.optim.Adam(
 61 |                 filter(lambda p: p.requires_grad, self.model.parameters())
 62 |             )
 63 |         elif system_configs.opt_algo == "sgd":
 64 |             self.optimizer = torch.optim.SGD(
 65 |                 filter(lambda p: p.requires_grad, self.model.parameters()),
 66 |                 lr=system_configs.learning_rate, 
 67 |                 momentum=0.9, weight_decay=0.0001
 68 |             )
 69 |         else:
 70 |             raise ValueError("unknown optimizer")
 71 | 
 72 |     def cuda(self):
 73 |         self.model.cuda()
 74 | 
 75 |     def load_cropped_pretrained_model(self, params_file):
 76 |         x = torch.load(params_file)
 77 |         params = {'module.model.%s'%k: v for k, v in x.items() if 'heats' not in k}
 78 |         self.network.load_state_dict(params, strict=False)
 79 |         print("load the cropped weights from COCO successfully.")
 80 | 
 81 |     def fix_layers(self):
 82 |         for m, v in self.network.named_parameters():
 83 |             if '.pre' in m or '.kps' in m:
 84 |                 v.requires_grad = False
 85 | 
 86 |     def train_mode(self):
 87 |         self.network.train()
 88 | 
 89 |     def eval_mode(self):
 90 |         self.network.eval()
 91 | 
 92 |     def train(self, xs, ys, **kwargs):
 93 |         xs = [x for x in xs]
 94 |         ys = [y for y in ys]
 95 | 
 96 |         self.optimizer.zero_grad()
 97 |         loss_kp = self.network(xs, ys)
 98 |         loss        = loss_kp[0]
 99 |         focal_loss  = loss_kp[1]
100 |         pull_loss   = loss_kp[2]
101 |         push_loss   = loss_kp[3]
102 |         regr_loss   = loss_kp[4]
103 |         loss        = loss.mean()
104 |         focal_loss  = focal_loss.mean()
105 |         pull_loss   = pull_loss.mean()
106 |         push_loss   = push_loss.mean()
107 |         regr_loss   = regr_loss.mean()
108 |         loss.backward()
109 |         self.optimizer.step()
110 |         return loss, focal_loss, pull_loss, push_loss, regr_loss
111 | 
112 |     def validate(self, xs, ys, **kwargs):
113 |         with torch.no_grad():
114 |             xs = [x.cuda(non_blocking=True) for x in xs]
115 |             ys = [y.cuda(non_blocking=True) for y in ys]
116 | 
117 |             loss_kp = self.network(xs, ys)
118 |             loss       = loss_kp[0]
119 |             focal_loss = loss_kp[1]
120 |             pull_loss  = loss_kp[2]
121 |             push_loss  = loss_kp[3]
122 |             regr_loss  = loss_kp[4]
123 |             loss = loss.mean()
124 |             return loss
125 | 
126 |     def test(self, xs, **kwargs):
127 |         with torch.no_grad():
128 |             xs = [x.cuda(non_blocking=True) for x in xs]
129 |             return self.model(*xs, **kwargs)
130 | 
131 |     def set_lr(self, lr):
132 |         print("setting learning rate to: {}".format(lr))
133 |         for param_group in self.optimizer.param_groups:
134 |             param_group["lr"] = lr
135 | 
136 |     def load_pretrained_params(self, pretrained_model):
137 |         print("loading from {}".format(pretrained_model))
138 |         with open(pretrained_model, "rb") as f:
139 |             params = torch.load(f)
140 |             self.model.load_state_dict(params)
141 | 
142 |     def load_params(self, iteration):
143 |         cache_file = system_configs.snapshot_file.format(iteration)
144 |         print("loading model from {}".format(cache_file))
145 |         with open(cache_file, "rb") as f:
146 |             params = torch.load(f)
147 |             self.model.load_state_dict(params)
148 | 
149 |     def save_params(self, iteration):
150 |         cache_file = system_configs.snapshot_file.format(iteration)
151 |         print("saving model to {}".format(cache_file))
152 |         with open(cache_file, "wb") as f:
153 |             params = self.model.state_dict()
154 |             torch.save(params, f) 
155 | 


--------------------------------------------------------------------------------
/conda_packagelist.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | @EXPLICIT
 5 | https://repo.continuum.io/pkgs/main/linux-64/blas-1.0-mkl.tar.bz2
 6 | https://repo.continuum.io/pkgs/main/linux-64/bzip2-1.0.6-h9a117a8_4.tar.bz2
 7 | https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2018.4.16-0.tar.bz2
 8 | https://conda.anaconda.org/caffe2/linux-64/caffe2-cuda8.0-cudnn7-0.8.dev-py36_2018.05.14.tar.bz2
 9 | https://repo.continuum.io/pkgs/main/linux-64/cairo-1.14.12-h7636065_2.tar.bz2
10 | https://repo.continuum.io/pkgs/main/linux-64/certifi-2018.4.16-py36_0.tar.bz2
11 | https://repo.continuum.io/pkgs/main/linux-64/cffi-1.11.5-py36h9745a5d_0.tar.bz2
12 | https://repo.continuum.io/pkgs/free/linux-64/cudatoolkit-8.0-3.tar.bz2
13 | https://repo.continuum.io/pkgs/main/linux-64/cycler-0.10.0-py36h93f1223_0.tar.bz2
14 | https://repo.continuum.io/pkgs/main/linux-64/dbus-1.13.2-h714fa37_1.tar.bz2
15 | https://repo.continuum.io/pkgs/main/linux-64/expat-2.2.5-he0dffb1_0.tar.bz2
16 | https://repo.continuum.io/pkgs/main/linux-64/ffmpeg-3.4-h7264315_0.tar.bz2
17 | https://repo.continuum.io/pkgs/main/linux-64/fontconfig-2.12.6-h49f89f6_0.tar.bz2
18 | https://repo.continuum.io/pkgs/free/linux-64/freeglut-2.8.1-0.tar.bz2
19 | https://repo.continuum.io/pkgs/main/linux-64/freetype-2.8-hab7d2ae_1.tar.bz2
20 | https://repo.continuum.io/pkgs/free/linux-64/future-0.16.0-py36_1.tar.bz2
21 | https://repo.continuum.io/pkgs/main/linux-64/gflags-2.2.1-hf484d3e_0.tar.bz2
22 | https://repo.continuum.io/pkgs/main/linux-64/glib-2.56.1-h000015b_0.tar.bz2
23 | https://repo.continuum.io/pkgs/main/linux-64/glog-0.3.5-hf484d3e_1.tar.bz2
24 | https://repo.continuum.io/pkgs/main/linux-64/graphite2-1.3.11-hf63cedd_1.tar.bz2
25 | https://repo.continuum.io/pkgs/main/linux-64/gst-plugins-base-1.14.0-hbbd80ab_1.tar.bz2
26 | https://repo.continuum.io/pkgs/main/linux-64/gstreamer-1.14.0-hb453b48_1.tar.bz2
27 | https://repo.continuum.io/pkgs/main/linux-64/h5py-2.8.0-py36hca9c191_0.tar.bz2
28 | https://repo.continuum.io/pkgs/main/linux-64/harfbuzz-1.7.6-h5f0a787_1.tar.bz2
29 | https://repo.continuum.io/pkgs/main/linux-64/hdf5-1.8.18-h6792536_1.tar.bz2
30 | https://repo.continuum.io/pkgs/main/linux-64/icu-58.2-h9c2bf20_1.tar.bz2
31 | https://repo.continuum.io/pkgs/main/linux-64/intel-openmp-2018.0.0-8.tar.bz2
32 | https://repo.continuum.io/pkgs/main/linux-64/jasper-2.0.14-h07fcdf6_0.tar.bz2
33 | https://repo.continuum.io/pkgs/main/linux-64/cython-0.26.1-py36h21c49d0_0.tar.bz2
34 | https://repo.continuum.io/pkgs/main/linux-64/jpeg-9b-h024ee3a_2.tar.bz2
35 | https://repo.continuum.io/pkgs/main/linux-64/kiwisolver-1.0.1-py36h764f252_0.tar.bz2
36 | https://repo.continuum.io/pkgs/main/linux-64/libedit-3.1-heed3624_0.tar.bz2
37 | https://repo.continuum.io/pkgs/main/linux-64/libffi-3.2.1-hd88cf55_4.tar.bz2
38 | https://repo.continuum.io/pkgs/main/linux-64/libgcc-ng-7.2.0-hdf63c60_3.tar.bz2
39 | https://repo.continuum.io/pkgs/main/linux-64/libgfortran-ng-7.2.0-hdf63c60_3.tar.bz2
40 | https://repo.continuum.io/pkgs/main/linux-64/libglu-9.0.0-h0c0bdc1_1.tar.bz2
41 | https://repo.continuum.io/pkgs/main/linux-64/libopus-1.2.1-hb9ed12e_0.tar.bz2
42 | https://repo.continuum.io/pkgs/main/linux-64/libpng-1.6.34-hb9fc6fc_0.tar.bz2
43 | https://repo.continuum.io/pkgs/main/linux-64/libprotobuf-3.5.2-h6f1eeef_0.tar.bz2
44 | https://repo.continuum.io/pkgs/main/linux-64/libstdcxx-ng-7.2.0-hdf63c60_3.tar.bz2
45 | https://repo.continuum.io/pkgs/main/linux-64/libtiff-4.0.9-h28f6b97_0.tar.bz2
46 | https://repo.continuum.io/pkgs/main/linux-64/libvpx-1.6.1-h888fd40_0.tar.bz2
47 | https://repo.continuum.io/pkgs/main/linux-64/libxcb-1.13-h1bed415_1.tar.bz2
48 | https://repo.continuum.io/pkgs/main/linux-64/libxml2-2.9.8-hf84eae3_0.tar.bz2
49 | https://repo.continuum.io/pkgs/main/linux-64/matplotlib-2.2.2-py36h0e671d2_1.tar.bz2
50 | https://repo.continuum.io/pkgs/main/linux-64/mkl-2018.0.2-1.tar.bz2
51 | https://repo.continuum.io/pkgs/main/linux-64/mkl_fft-1.0.1-py36h3010b51_0.tar.bz2
52 | https://repo.continuum.io/pkgs/main/linux-64/mkl_random-1.0.1-py36h629b387_0.tar.bz2
53 | https://repo.continuum.io/pkgs/main/linux-64/ncurses-6.0-h9df7e31_2.tar.bz2
54 | https://repo.continuum.io/pkgs/main/linux-64/ninja-1.8.2-py36h6bb024c_1.tar.bz2
55 | https://repo.continuum.io/pkgs/main/linux-64/numpy-1.14.3-py36hcd700cb_1.tar.bz2
56 | https://repo.continuum.io/pkgs/main/linux-64/numpy-base-1.14.3-py36h9be14a7_1.tar.bz2
57 | https://repo.continuum.io/pkgs/main/linux-64/olefile-0.45.1-py36_0.tar.bz2
58 | https://repo.continuum.io/pkgs/main/linux-64/opencv-3.3.1-py36h9248ab4_2.tar.bz2
59 | https://repo.continuum.io/pkgs/main/linux-64/openssl-1.0.2o-h20670df_0.tar.bz2
60 | https://repo.continuum.io/pkgs/main/linux-64/pcre-8.42-h439df22_0.tar.bz2
61 | https://repo.continuum.io/pkgs/main/linux-64/pillow-5.1.0-py36h3deb7b8_0.tar.bz2
62 | https://repo.continuum.io/pkgs/main/linux-64/pip-10.0.1-py36_0.tar.bz2
63 | https://repo.continuum.io/pkgs/main/linux-64/pixman-0.34.0-hceecf20_3.tar.bz2
64 | https://conda.anaconda.org/conda-forge/linux-64/protobuf-3.5.2-py36_0.tar.bz2
65 | https://repo.continuum.io/pkgs/main/linux-64/pycparser-2.18-py36hf9f622e_1.tar.bz2
66 | https://repo.continuum.io/pkgs/main/linux-64/pyparsing-2.2.0-py36hee85983_1.tar.bz2
67 | https://repo.continuum.io/pkgs/main/linux-64/pyqt-5.9.2-py36h751905a_0.tar.bz2
68 | https://repo.continuum.io/pkgs/main/linux-64/python-3.6.4-hc3d631a_3.tar.bz2
69 | https://repo.continuum.io/pkgs/main/linux-64/python-dateutil-2.7.2-py36_0.tar.bz2
70 | https://conda.anaconda.org/pytorch/linux-64/pytorch-0.4.1-py36_cuda8.0.61_cudnn7.1.2_1.tar.bz2
71 | https://repo.continuum.io/pkgs/main/linux-64/pytz-2018.4-py36_0.tar.bz2
72 | https://repo.continuum.io/pkgs/main/linux-64/pyyaml-3.12-py36hafb9ca4_1.tar.bz2
73 | https://repo.continuum.io/pkgs/main/linux-64/qt-5.9.5-h7e424d6_0.tar.bz2
74 | https://repo.continuum.io/pkgs/main/linux-64/readline-7.0-ha6073c6_4.tar.bz2
75 | https://repo.continuum.io/pkgs/main/linux-64/scikit-learn-0.19.1-py36h7aa7ec6_0.tar.bz2
76 | https://repo.continuum.io/pkgs/main/linux-64/scipy-1.1.0-py36hfc37229_0.tar.bz2
77 | https://repo.continuum.io/pkgs/main/linux-64/setuptools-39.1.0-py36_0.tar.bz2
78 | https://repo.continuum.io/pkgs/main/linux-64/sip-4.19.8-py36hf484d3e_0.tar.bz2
79 | https://repo.continuum.io/pkgs/main/linux-64/six-1.11.0-py36h372c433_1.tar.bz2
80 | https://repo.continuum.io/pkgs/main/linux-64/sqlite-3.23.1-he433501_0.tar.bz2
81 | https://repo.continuum.io/pkgs/main/linux-64/tk-8.6.7-hc745277_3.tar.bz2
82 | https://conda.anaconda.org/pytorch/linux-64/torchvision-0.2.1-py36_1.tar.bz2
83 | https://repo.continuum.io/pkgs/main/linux-64/tornado-5.0.2-py36_0.tar.bz2
84 | https://repo.continuum.io/pkgs/main/linux-64/tqdm-4.23.0-py36_0.tar.bz2
85 | https://repo.continuum.io/pkgs/main/linux-64/wheel-0.31.0-py36_0.tar.bz2
86 | https://repo.continuum.io/pkgs/main/linux-64/xz-5.2.3-h5e939de_4.tar.bz2
87 | https://repo.continuum.io/pkgs/main/linux-64/yaml-0.1.7-had09818_2.tar.bz2
88 | https://repo.continuum.io/pkgs/main/linux-64/zlib-1.2.11-ha838bed_2.tar.bz2
89 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # !/usr/bin/env python
  2 | import os
  3 | import json
  4 | import torch
  5 | import queue
  6 | import pprint
  7 | import argparse
  8 | import importlib
  9 | import threading
 10 | import traceback
 11 | import numpy as np
 12 | 
 13 | from tqdm import tqdm
 14 | from utils import stdout_to_tqdm
 15 | from db.datasets import datasets
 16 | from config import system_configs
 17 | from nnet.py_factory import NetworkFactory
 18 | from torch.multiprocessing import Process, Queue
 19 | 
 20 | os.environ["CUDA_VISIBLE_DEVICES"] = '0, 1'
 21 | torch.backends.cudnn.enabled   = True
 22 | torch.backends.cudnn.benchmark = True
 23 | 
 24 | 
 25 | def parse_args():
 26 |     parser = argparse.ArgumentParser(description="Train CenterNet")
 27 |     parser.add_argument("--cfg_file", default='CenterNet-52', help="config file", type=str)
 28 |     parser.add_argument("--iter", dest="start_iter",
 29 |                         help="train at iteration i",
 30 |                         default=0, type=int)
 31 |     parser.add_argument("--threads", dest="threads", default=16, type=int)
 32 | 
 33 |     #args = parser.parse_args()
 34 |     args, unparsed = parser.parse_known_args()
 35 |     return args
 36 | 
 37 | def prefetch_data(db, queue, sample_data, data_aug):
 38 | 	ind = 0
 39 | 	print("start prefetching data...")
 40 | 	np.random.seed(os.getpid())
 41 | 	while True:
 42 | 		try:
 43 | 			data, ind = sample_data(db, ind, data_aug=data_aug)
 44 | 			queue.put(data)
 45 | 		except Exception as e:
 46 | 			traceback.print_exc()
 47 | 			raise e
 48 | 
 49 | def pin_memory(data_queue, pinned_data_queue, sema):
 50 | 	while True:
 51 | 		data = data_queue.get()
 52 | 		data["xs"] = [x.pin_memory() for x in data["xs"]]
 53 | 		data["ys"] = [y.pin_memory() for y in data["ys"]]
 54 | 
 55 | 		pinned_data_queue.put(data)
 56 | 		if sema.acquire(blocking=False):
 57 | 			return
 58 | 
 59 | 
 60 | def init_parallel_jobs(dbs, queue, fn, data_aug):
 61 | 	tasks = [Process(target=prefetch_data, args=(db, queue, fn, data_aug)) for db in dbs]
 62 | 	for task in tasks:
 63 | 		task.daemon = True
 64 | 		task.start()
 65 | 	return tasks
 66 | 
 67 | 
 68 | def train(training_dbs, validation_db, start_iter=0):
 69 | 	learning_rate = system_configs.learning_rate
 70 | 	max_iteration = system_configs.max_iter
 71 | 	pretrained_model = system_configs.pretrain
 72 | 	snapshot = system_configs.snapshot
 73 | 	val_iter = system_configs.val_iter
 74 | 	display = system_configs.display
 75 | 	decay_rate = system_configs.decay_rate
 76 | 	stepsize = system_configs.stepsize
 77 | 
 78 | 	training_size = len(training_dbs[0].db_inds)
 79 | 	validation_size = len(validation_db.db_inds)
 80 | 
 81 | 	# queues storing data for training
 82 | 	training_queue = Queue(system_configs.prefetch_size)  # buffer size of prefetch data
 83 | 	validation_queue = Queue(5)
 84 | 
 85 | 	# queues storing pinned data for training
 86 | 	pinned_training_queue = queue.Queue(system_configs.prefetch_size)
 87 | 	pinned_validation_queue = queue.Queue(5)
 88 | 
 89 | 	# load data sampling function
 90 | 	data_file 	= "sample.{}".format(training_dbs[0].data)
 91 | 	sample_data = importlib.import_module(data_file).sample_data
 92 | 
 93 | 	#allocate resources for parallel reading
 94 | 	training_tasks = init_parallel_jobs(training_dbs, training_queue, sample_data, True)
 95 | 	if val_iter:
 96 | 		validation_tasks = init_parallel_jobs([validation_db], validation_queue, sample_data, False)
 97 | 
 98 | 	training_pin_semaphore = threading.Semaphore()
 99 | 	validation_pin_semaphore = threading.Semaphore()
100 | 	training_pin_semaphore.acquire()
101 | 	validation_pin_semaphore.acquire()
102 | 
103 | 	training_pin_args = (training_queue, pinned_training_queue, training_pin_semaphore)
104 | 	training_pin_thread = threading.Thread(target=pin_memory, args=training_pin_args)
105 | 	training_pin_thread.daemon = True
106 | 	training_pin_thread.start()
107 | 
108 | 	validation_pin_args = (validation_queue, pinned_validation_queue, validation_pin_semaphore)
109 | 	validation_pin_thread = threading.Thread(target=pin_memory, args=validation_pin_args)
110 | 	validation_pin_thread.daemon = True
111 | 	validation_pin_thread.start()
112 | 
113 | 	print("building model...")
114 | 	nnet = NetworkFactory(training_dbs[0])
115 | 
116 | 
117 | 	if pretrained_model is not None:
118 | 		if not os.path.exists(pretrained_model):
119 | 			raise ValueError("pretrained model does not exist")
120 | 		print("loading from pretrained model")
121 | 		nnet.load_pretrained_params(pretrained_model)
122 | 
123 | 	if start_iter:
124 | 		learning_rate /= (decay_rate ** (start_iter // stepsize))
125 | 
126 | 		nnet.load_params(start_iter)
127 | 		nnet.set_lr(learning_rate)
128 | 		print("training starts from iteration {} with learning_rate {}".format(start_iter + 1, learning_rate))
129 | 	else:
130 | 		nnet.set_lr(learning_rate)
131 | 
132 | 	print("training start...")
133 | 	nnet.cuda()
134 | 	nnet.train_mode()
135 | 
136 | 	with stdout_to_tqdm() as save_stdout:
137 | 		for iteration in tqdm(range(start_iter+1, max_iteration+1), file=save_stdout, ncols=80):
138 | 			training = pinned_training_queue.get(block=True)
139 | 			training_loss, focal_loss, pull_loss, push_loss, regr_loss = nnet.train(**training)
140 | 
141 | 			if display and iteration % display == 0:
142 | 				print("training loss at iteration {}: {}".format(iteration, training_loss.item()))
143 | 				print("focal loss at iteration {}:    {}".format(iteration, focal_loss.item()))
144 | 				print("pull loss at iteration {}:     {}".format(iteration, pull_loss.item()))
145 | 				print("push loss at iteration {}:     {}".format(iteration, push_loss.item()))
146 | 				print("regr loss at iteration {}:     {}".format(iteration, regr_loss.item()))
147 | 
148 | 			del training_loss, focal_loss, pull_loss, push_loss, regr_loss
149 | 
150 | 			if val_iter and validation_db.db_inds.size and iteration % val_iter == 0:
151 | 				nnet.eval_mode()
152 | 				validation = pinned_validation_queue.get(block=True)
153 | 				validation_loss = nnet.validate(**validation)
154 | 				print("validation loss at iteration {}: {}".format(iteration, validation_loss.item()))
155 | 				nnet.train_mode()
156 | 
157 | 			if iteration % snapshot == 0:
158 | 				nnet.save_params(iteration)
159 | 
160 | 			if iteration % stepsize == 0:
161 | 				learning_rate /= decay_rate
162 | 				nnet.set_lr(learning_rate)
163 | 
164 | 	# sending signal to kill the thread
165 | 	training_pin_semaphore.release()
166 | 	validation_pin_semaphore.release()
167 | 
168 | 	# terminating data fetching processes
169 | 	for training_task in training_tasks:
170 | 		training_task.terminate()
171 | 	for validation_task in validation_tasks:
172 | 		validation_task.terminate()
173 | 
174 | 
175 | if __name__=="__main__":
176 | 	args = parse_args()
177 | 	cfg_file = os.path.join(system_configs.config_dir, args.cfg_file+".json")
178 | 	with open(cfg_file, "r") as f:
179 | 		configs = json.load(f)
180 | 
181 | 	configs["system"]["snapshot_name"] = args.cfg_file
182 | 	system_configs.update_config(configs["system"])
183 | 
184 | 	train_split = system_configs.train_split
185 | 	val_split 	= system_configs.val_split
186 | 	print("loading all datasets ...")
187 | 	dataset = system_configs.dataset
188 | 	threads = args.threads
189 | 
190 | 	print("using {} threads".format(threads))
191 | 	training_dbs = [datasets[dataset](configs["db"], train_split) for _ in range(threads)]
192 | 	validation_db = datasets[dataset](configs["db"], val_split)
193 | 
194 | 	print("system config ...")
195 | 	pprint.pprint(system_configs.full)
196 | 
197 | 	print("db config...")
198 | 	pprint.pprint(training_dbs[0].configs)
199 | 
200 | 	print("len of db: {}".format(len(training_dbs[0].db_inds)))
201 | 	train(training_dbs, validation_db, args.start_iter)
202 | 
203 | 


--------------------------------------------------------------------------------
/db/cityperson.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import pickle
  4 | import numpy as np
  5 | from tqdm import tqdm
  6 | from config import system_configs
  7 | from db.detection import DETECTION
  8 | 
  9 | 
 10 | class CityPerson(DETECTION):
 11 | 	def __init__(self, db_config, split):
 12 | 		super(CityPerson, self).__init__(db_config)
 13 | 
 14 | 		data_dir = system_configs.data_dir
 15 | 		result_dir = system_configs.result_dir
 16 | 		cache_dir = system_configs.cache_dir
 17 | 
 18 | 		self._split = split
 19 | 		self._dataset = {
 20 | 			"trainval": "train",
 21 | 			"minival": "val"
 22 | 		}[self._split]
 23 | 
 24 | 		self._image_dir = os.path.join(data_dir, "leftImg8bit")
 25 | 
 26 | 		self._image_file = os.path.join(self._image_dir, "{}")
 27 | 
 28 | 		self._anno_dir = os.path.join(data_dir, "gtBboxCityPersons")
 29 | 
 30 | 		self._data = "pedestrian"  # the sample function file
 31 | 		self._mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32)
 32 | 		self._std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32)
 33 | 		self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], dtype=np.float32)
 34 | 		self._eig_vec = np.array([
 35 | 			[-0.58752847, -0.69563484, 0.41340352],
 36 | 			[-0.5832747, 0.00994535, -0.81221408],
 37 | 			[-0.56089297, 0.71832671, 0.41158938]
 38 | 		], dtype=np.float32)
 39 | 
 40 | 		self._cache_file = os.path.join(cache_dir, "cityperson_{}.pkl".format(self._split))
 41 | 		self._load_data()
 42 | 		self._db_inds = np.arange(len(self._image_ids))
 43 | 
 44 | 
 45 | 	def _load_data(self):
 46 | 		print("loading from cache file: {}".format(self._cache_file))
 47 | 		if not os.path.exists(self._cache_file):
 48 | 			print("No cache file found...")
 49 | 			self._extract_data()
 50 | 			with open(self._cache_file, "wb") as f:
 51 | 				pickle.dump([self._detections, self._image_ids], f)
 52 | 		else:
 53 | 			with open(self._cache_file, "rb") as f:
 54 | 				self._detections, self._image_ids = pickle.load(f)
 55 | 
 56 | 	def _extract_data(self):
 57 | 		self._image_ids = []
 58 | 		self._detections = {}
 59 | 		subsets = os.listdir(os.path.join(self._anno_dir, self._dataset))  #["frankfurt", "lindau", "munster"]
 60 | 		for ss in subsets:
 61 | 			anno_dir = '{}/{}'.format(self._dataset, ss)
 62 | 			for anno in os.listdir(os.path.join(self._anno_dir, anno_dir)):
 63 | 				anno_file = os.path.join(self._anno_dir, '{}/{}'.format(anno_dir, anno))
 64 | 				img_id = os.path.join(anno_dir, anno.replace("gtBboxCityPersons.json", "leftImg8bit.png"))
 65 | 				self._image_ids.append(img_id)
 66 | 				bboxes = []
 67 | 				with open(anno_file, 'r') as f:
 68 | 					anno_info = json.load(f)
 69 | 					objs = anno_info["objects"]
 70 | 					for obj in objs:
 71 | 						if obj['label'] == 'pedestrian':
 72 | 							bbox = obj['bbox']
 73 | 							bboxVis = obj['bboxVis']
 74 | 							if bboxVis[2]*bboxVis[3] * 1.0 / bbox[2] * bbox[3] > 0.4:
 75 | 								bbox = np.array(bbox)
 76 | 								bbox[2:] += bbox[:2]
 77 | 								bboxes.append(bbox.tolist())
 78 | 				bboxes = np.array(bboxes, dtype=float)
 79 | 				if bboxes.size == 0:
 80 | 					self._detections[img_id] = np.zeros((0, 5))
 81 | 				else:
 82 | 					self._detections[img_id] = np.hstack((bboxes, np.ones((len(bboxes), 1))))
 83 | 
 84 | 	def detections(self, ind):
 85 | 		image_id = self._image_ids[ind]
 86 | 		detections = self._detections[image_id]
 87 | 		return detections.astype(float).copy()
 88 | 
 89 | 	def _to_float(self, x):
 90 | 		return float(":.2f".format(x))
 91 | 
 92 | 	def convert_to_dict(self, all_boxes):
 93 | 		scores, bboxes, img_ids, clses = [], [], [], []
 94 | 		for img_id in all_boxes:
 95 | 			for cls_id in all_boxes[img_id]:
 96 | 				dets = all_boxes[img_id][cls_id]
 97 | 				img_ids.extend([img_id] * len(dets))
 98 | 				clses.extend([cls_id] * len(dets))
 99 | 				scores.append(dets[:, -1])
100 | 				bboxes.append(dets[:, :-1])
101 | 		scores = np.concatenate(scores, axis=0)
102 | 		bboxes = np.concatenate(bboxes, axis=0)
103 | 		detections = {"image_ids": img_ids,
104 | 					  "category_ids": clses,
105 | 					  "bboxes": bboxes,
106 | 					  "confidences": scores}
107 | 		return detections
108 | 
109 | 
110 | 
111 | 	def evaluate(self, detections, ovthresh=0.5):
112 | 		image_ids 	= detections['image_ids']
113 | 		bboxes 		= detections['bboxes']
114 | 		confidences = detections["confidences"]
115 | 		category_ids= detections["category_ids"]  # only one class in our results
116 | 
117 | 		# pre and rec
118 | 		sorted_ind 	= np.argsort(-confidences)
119 | 		bboxes		= bboxes[sorted_ind, :]
120 | 		image_ids 	= [image_ids[x] for x in sorted_ind]
121 | 		nd 			= len(sorted_ind)
122 | 		tp, fp 		= np.zeros(nd), np.zeros(nd)
123 | 
124 | 		nps = 0
125 | 		R_dets = {}
126 | 		for id in image_ids:
127 | 			if id not in R_dets:
128 | 				R_dets[id] = np.zeros(len(self._detections[id]))
129 | 				nps += len(self._detections[id])
130 | 
131 | 		for d in range(nd):
132 | 			R 		= self._detections[image_ids[d]]
133 | 			R_det 	= R_dets[image_ids[d]]
134 | 			bb 		= bboxes[d, :].astype(float)
135 | 			ovrmax	= -np.inf
136 | 			BBGT 	= R[:, :4].astype(float)
137 | 
138 | 			if BBGT.size > 0:
139 | 				xmin = np.maximum(BBGT[:, 0], bb[0])
140 | 				xmax = np.minimum(BBGT[:, 2], bb[2])
141 | 				ymin = np.maximum(BBGT[:, 1], bb[1])
142 | 				ymax = np.minimum(BBGT[:, 3], bb[3])
143 | 				w = np.maximum(xmax - xmin + 1, 0.)
144 | 				h = np.maximum(ymax - ymin + 1, 0, )
145 | 				inters = w * h  # intersection
146 | 				unions = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \
147 | 						(BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] -
148 | 														  BBGT[:, 1] + 1.) - inters
149 | 
150 | 				ious = inters / unions
151 | 				ovrmax = np.max(ious)
152 | 				jmax = np.argmax(ious)
153 | 			if ovrmax > ovthresh:
154 | 				if R_det[jmax] == 0:
155 | 					tp[d] = 1
156 | 					R_det[jmax] = 1
157 | 				else:
158 | 					fp[d] = 1
159 | 			else:
160 | 				fp[d] = 1
161 | 		fp 		= np.cumsum(fp)
162 | 		tp 		= np.cumsum(tp)
163 | 		rec		= tp/float(nps)
164 | 		pre 	= tp/np.maximum(tp + fp, np.finfo(np.float64).eps)
165 | 
166 | 		def voc_ap(rec, pre, use_07_metric=False):
167 | 			"""Compute VOC AP given precision and recall.
168 | 			If use_07_metric is true, uses the VOC 07 11-point method (default: False)"""
169 | 			if use_07_metric:
170 | 				ap = 0.
171 | 				for t in np.arange(0., 1.1, 0.1):
172 | 					if np.sum(rec >= t) == 0:
173 | 						p = 0
174 | 					else:
175 | 						p = np.max(pre[rec >= t])
176 | 					ap = ap + p / 11.
177 | 			else:
178 | 				# first append sentinel values at the end
179 | 				mrec = np.concatenate(([0.], rec, [1.]))
180 | 				mpre = np.concatenate(([0.], pre, [0.]))
181 | 				# compute the precision,
182 | 				for i in range(mpre.size - 1, 0, -1):
183 | 					mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
184 | 				i = np.where(mrec[1:] != mrec[:-1])[0]
185 | 				ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
186 | 			return ap
187 | 		print("The final evaluated AP: {}".format(voc_ap(rec, pre)))
188 | 
189 | 
190 | if __name__=='__main__':
191 | 	import cv2
192 | 	os.chdir('../')
193 | 
194 | 	cfg_file = os.path.join(system_configs.config_dir, 'CenterNet-52.json')
195 | 	with open(cfg_file, 'r') as f:
196 | 		configs = json.load(f)
197 | 
198 | 	configs['system']['snapshot_name'] = 'CenterNet-52'
199 | 	system_configs.update_config(configs['system'])
200 | 
201 | 	val_split = system_configs.val_split
202 | 	val_db = CityPerson(configs['db'], val_split)
203 | 
204 | 	ind = 1
205 | 	img_file = val_db.image_file(ind)
206 | 	detections = val_db.detections(ind)
207 | 	img = cv2.imread(img_file)
208 | 
209 | 	for d in detections:
210 | 		cv2.rectangle(img, (int(d[0]), int(d[1])), (int(d[2]), int(d[3])), color=(0, 0, 255))
211 | 
212 | 
213 | 	cv2.imshow('test', img)
214 | 	cv2.waitKey(0)


--------------------------------------------------------------------------------
/sample/pedestrian.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import math
  3 | import numpy as np
  4 | import torch
  5 | import random
  6 | import string
  7 | 
  8 | from config import system_configs
  9 | from utils import crop_image, normalize_, color_jittering_, lighting_
 10 | from .utils import random_crop, draw_gaussian, gaussian_radius
 11 | 
 12 | 
 13 | def _full_image_crop(image, detections):
 14 | 	detections = detections.copy()
 15 | 	height, width = image.shape[0:2]
 16 | 
 17 | 	max_hw = max(height, width)
 18 | 	center = [height // 2, width // 2]
 19 | 	size = [max_hw, max_hw]
 20 | 
 21 | 	image, border, offset = crop_image(image, center, size)
 22 | 	detections[:, 0:4:2] += border[2]
 23 | 	detections[:, 1:4:2] += border[0]
 24 | 	return image, detections
 25 | 
 26 | 
 27 | def _resize_image(image, detections, size):
 28 | 	detections = detections.copy()
 29 | 	height, width = image.shape[0:2]
 30 | 	new_height, new_width = size
 31 | 
 32 | 	image = cv2.resize(image, (new_width, new_height))
 33 | 
 34 | 	height_ratio = new_height / height
 35 | 	width_ratio = new_width / width
 36 | 	detections[:, 0:4:2] *= width_ratio
 37 | 	detections[:, 1:4:2] *= height_ratio
 38 | 	return image, detections
 39 | 
 40 | 
 41 | def _clip_detections(image, detections):
 42 | 	detections = detections.copy()
 43 | 	height, width = image.shape[0:2]
 44 | 
 45 | 	detections[:, 0:4:2] = np.clip(detections[:, 0:4:2], 0, width - 1)
 46 | 	detections[:, 1:4:2] = np.clip(detections[:, 1:4:2], 0, height - 1)
 47 | 	keep_inds = ((detections[:, 2] - detections[:, 0]) > 0) & \
 48 | 				((detections[:, 3] - detections[:, 1]) > 0)
 49 | 	detections = detections[keep_inds]
 50 | 	return detections
 51 | 
 52 | 
 53 | def kp_detection(db, k_ind, data_aug, debug):
 54 | 	data_rng = system_configs.data_rng
 55 | 	batch_size = system_configs.batch_size
 56 | 
 57 | 	categories = db.configs["categories"]
 58 | 	input_size = db.configs["input_size"]
 59 | 	output_size = db.configs["output_sizes"][0]
 60 | 
 61 | 	border = db.configs["border"]
 62 | 	lighting = db.configs["lighting"]
 63 | 	rand_crop = db.configs["rand_crop"]
 64 | 	rand_color = db.configs["rand_color"]
 65 | 	rand_scales = db.configs["rand_scales"]
 66 | 	gaussian_bump = db.configs["gaussian_bump"]
 67 | 	gaussian_iou = db.configs["gaussian_iou"]
 68 | 	gaussian_rad = db.configs["gaussian_radius"]
 69 | 
 70 | 	max_tag_len = 128
 71 | 
 72 | 	# allocating memory
 73 | 	images = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32)
 74 | 	tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
 75 | 	br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
 76 | 	ct_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
 77 | 	tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
 78 | 	br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
 79 | 	ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
 80 | 	tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
 81 | 	br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
 82 | 	ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
 83 | 	tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
 84 | 	tag_lens = np.zeros((batch_size,), dtype=np.int32)
 85 | 
 86 | 	db_size = db.db_inds.size
 87 | 	for b_ind in range(batch_size):
 88 | 		if not debug and k_ind == 0:
 89 | 			db.shuffle_inds()
 90 | 
 91 | 		db_ind = db.db_inds[k_ind]
 92 | 		k_ind = (k_ind + 1) % db_size
 93 | 
 94 | 		# reading image
 95 | 		image_file = db.image_file(db_ind)
 96 | 		image = cv2.imread(image_file)
 97 | 
 98 | 		# reading detections
 99 | 		detections = db.detections(db_ind)
100 | 
101 | 		# cropping an image randomly
102 | 		if not debug and rand_crop:
103 | 			image, detections = random_crop(image, detections, rand_scales, input_size, border=border)
104 | 		else:
105 | 			image, detections = _full_image_crop(image, detections)
106 | 
107 | 		image, detections = _resize_image(image, detections, input_size)
108 | 		detections = _clip_detections(image, detections)
109 | 
110 | 		width_ratio = output_size[1] / input_size[1]
111 | 		height_ratio = output_size[0] / input_size[0]
112 | 
113 | 		# flipping an image randomly
114 | 		if not debug and np.random.uniform() > 0.5:
115 | 			image[:] = image[:, ::-1, :]
116 | 			width = image.shape[1]
117 | 			detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
118 | 
119 | 		if not debug:
120 | 			image = image.astype(np.float32) / 255.
121 | 			if rand_color:
122 | 				color_jittering_(data_rng, image)
123 | 				if lighting:
124 | 					lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
125 | 			normalize_(image, db.mean, db.std)
126 | 		images[b_ind] = image.transpose((2, 0, 1))
127 | 
128 | 		for ind, detection in enumerate(detections):
129 | 			category = int(detection[-1]) - 1
130 | 			# category = 0
131 | 
132 | 			xtl, ytl = detection[0], detection[1]
133 | 			xbr, ybr = detection[2], detection[3]
134 | 			xct, yct = (detection[2] + detection[0]) / 2., (detection[3] + detection[1]) / 2.
135 | 
136 | 			fxtl = (xtl * width_ratio)
137 | 			fytl = (ytl * height_ratio)
138 | 			fxbr = (xbr * width_ratio)
139 | 			fybr = (ybr * height_ratio)
140 | 			fxct = (xct * width_ratio)
141 | 			fyct = (yct * height_ratio)
142 | 
143 | 			xtl = int(fxtl)
144 | 			ytl = int(fytl)
145 | 			xbr = int(fxbr)
146 | 			ybr = int(fybr)
147 | 			xct = int(fxct)
148 | 			yct = int(fyct)
149 | 
150 | 			if gaussian_bump:
151 | 				width = detection[2] - detection[0]
152 | 				height = detection[3] - detection[1]
153 | 
154 | 				width = math.ceil(width * width_ratio)
155 | 				height = math.ceil(height * height_ratio)
156 | 
157 | 				if gaussian_rad == -1:
158 | 					radius = gaussian_radius((height, width), gaussian_iou)
159 | 					radius = max(0, int(radius))
160 | 				else:
161 | 					radius = gaussian_rad
162 | 
163 | 				draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
164 | 				draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
165 | 				draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte=5)
166 | 
167 | 			else:
168 | 				tl_heatmaps[b_ind, category, ytl, xtl] = 1
169 | 				br_heatmaps[b_ind, category, ybr, xbr] = 1
170 | 				ct_heatmaps[b_ind, category, yct, xct] = 1
171 | 
172 | 			tag_ind = tag_lens[b_ind]
173 | 			tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
174 | 			br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
175 | 			ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct]
176 | 			tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
177 | 			br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
178 | 			ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct
179 | 			tag_lens[b_ind] += 1
180 | 
181 | 	for b_ind in range(batch_size):
182 | 		tag_len = tag_lens[b_ind]
183 | 		tag_masks[b_ind, :tag_len] = 1
184 | 
185 | 	images = torch.from_numpy(images)
186 | 	tl_heatmaps = torch.from_numpy(tl_heatmaps)
187 | 	br_heatmaps = torch.from_numpy(br_heatmaps)
188 | 	ct_heatmaps = torch.from_numpy(ct_heatmaps)
189 | 	tl_regrs = torch.from_numpy(tl_regrs)
190 | 	br_regrs = torch.from_numpy(br_regrs)
191 | 	ct_regrs = torch.from_numpy(ct_regrs)
192 | 	tl_tags = torch.from_numpy(tl_tags)  # B x N, the index in (CHW)
193 | 	br_tags = torch.from_numpy(br_tags)
194 | 	ct_tags = torch.from_numpy(ct_tags)
195 | 	tag_masks = torch.from_numpy(tag_masks)  # convinent for batch compute
196 | 
197 | 	return {
198 | 			   "xs": [images, tl_tags, br_tags, ct_tags],
199 | 			   "ys": [tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs]
200 | 		   }, k_ind
201 | 
202 | 
203 | def sample_data(db, k_ind, data_aug=True, debug=False):
204 | 	return globals()[system_configs.sampling_function](db, k_ind, data_aug, debug)
205 | 


--------------------------------------------------------------------------------
/models/py_utils/kp_utils.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from .utils import convolution, residual
  6 | 
  7 | 
  8 | class MergeUp(nn.Module):
  9 | 	def forward(self, up1, up2):
 10 | 		return up1 + up2
 11 | 
 12 | 
 13 | def make_merge_layer(dim):
 14 | 	return MergeUp()
 15 | 
 16 | 
 17 | def make_tl_layer(dim):
 18 | 	return None
 19 | 
 20 | 
 21 | def make_br_layer(dim):
 22 | 	return None
 23 | 
 24 | 
 25 | def make_ct_layer(dim):
 26 | 	return None
 27 | 
 28 | 
 29 | def make_pool_layer(dim):
 30 | 	return nn.MaxPool2d(kernel_size=2, stride=2)
 31 | 
 32 | 
 33 | def make_unpool_layer(dim):
 34 | 	return nn.Upsample(scale_factor=2)
 35 | 
 36 | 
 37 | def make_kp_layer(cnv_dim, curr_dim, out_dim):
 38 | 	return nn.Sequential(
 39 | 		convolution(3, cnv_dim, curr_dim, with_bn=False),
 40 | 		nn.Conv2d(curr_dim, out_dim, (1, 1))
 41 | 	)
 42 | 
 43 | 
 44 | def make_inter_layer(dim):
 45 | 	return residual(3, dim, dim)
 46 | 
 47 | 
 48 | def make_cnv_layer(inp_dim, out_dim):
 49 | 	return convolution(3, inp_dim, out_dim)
 50 | 
 51 | 
 52 | def _gather_feat(feat, ind, mask=None):
 53 | 	dim = feat.size(2)
 54 | 	ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
 55 | 	feat = feat.gather(1, ind)
 56 | 	if mask is not None:
 57 | 		mask = mask.unsqueeze(2).expand_as(feat)
 58 | 		feat = feat[mask]
 59 | 		feat = feat.view(-1, dim)
 60 | 	return feat
 61 | 
 62 | 
 63 | def _nms(heat, kernel=1):
 64 | 	pad = (kernel - 1) // 2
 65 | 
 66 | 	hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
 67 | 	keep = (hmax == heat).float()
 68 | 	return heat * keep
 69 | 
 70 | 
 71 | def _tranpose_and_gather_feat(feat, ind):
 72 | 	feat = feat.permute(0, 2, 3, 1).contiguous()
 73 | 	feat = feat.view(feat.size(0), -1, feat.size(3))
 74 | 	feat = _gather_feat(feat, ind)
 75 | 	return feat
 76 | 
 77 | 
 78 | def _topk(scores, K=20):
 79 | 	batch, cat, height, width = scores.size()
 80 | 
 81 | 	topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
 82 | 
 83 | 	topk_clses = (topk_inds / (height * width)).int()
 84 | 
 85 | 	topk_inds = topk_inds % (height * width)
 86 | 	topk_ys = (topk_inds / width).int().float()
 87 | 	topk_xs = (topk_inds % width).int().float()
 88 | 	return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
 89 | 
 90 | 
 91 | def _decode(
 92 | 		tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, ct_heat, ct_regr,
 93 | 		K=100, kernel=1, ae_threshold=1, num_dets=1000
 94 | ):
 95 | 	batch, cat, height, width = tl_heat.size()
 96 | 
 97 | 	tl_heat = torch.sigmoid(tl_heat)
 98 | 	br_heat = torch.sigmoid(br_heat)
 99 | 	ct_heat = torch.sigmoid(ct_heat)
100 | 
101 | 	# perform nms on heatmaps
102 | 	tl_heat = _nms(tl_heat, kernel=kernel)
103 | 	br_heat = _nms(br_heat, kernel=kernel)
104 | 	ct_heat = _nms(ct_heat, kernel=kernel)
105 | 
106 | 	tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K)
107 | 	br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K)
108 | 	ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = _topk(ct_heat, K=K)
109 | 
110 | 	tl_ys = tl_ys.view(batch, K, 1).expand(batch, K, K)
111 | 	tl_xs = tl_xs.view(batch, K, 1).expand(batch, K, K)
112 | 	br_ys = br_ys.view(batch, 1, K).expand(batch, K, K)
113 | 	br_xs = br_xs.view(batch, 1, K).expand(batch, K, K)
114 | 	ct_ys = ct_ys.view(batch, 1, K).expand(batch, K, K)
115 | 	ct_xs = ct_xs.view(batch, 1, K).expand(batch, K, K)
116 | 
117 | 	if tl_regr is not None and br_regr is not None:
118 | 		tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds)
119 | 		tl_regr = tl_regr.view(batch, K, 1, 2)
120 | 		br_regr = _tranpose_and_gather_feat(br_regr, br_inds)
121 | 		br_regr = br_regr.view(batch, 1, K, 2)
122 | 		ct_regr = _tranpose_and_gather_feat(ct_regr, ct_inds)
123 | 		ct_regr = ct_regr.view(batch, 1, K, 2)
124 | 
125 | 		tl_xs = tl_xs + tl_regr[..., 0]
126 | 		tl_ys = tl_ys + tl_regr[..., 1]
127 | 		br_xs = br_xs + br_regr[..., 0]
128 | 		br_ys = br_ys + br_regr[..., 1]
129 | 		ct_xs = ct_xs + ct_regr[..., 0]
130 | 		ct_ys = ct_ys + ct_regr[..., 1]
131 | 
132 | 	# all possible boxes based on top k corners (ignoring class)
133 | 	bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3)
134 | 
135 | 	tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds)
136 | 	tl_tag = tl_tag.view(batch, K, 1)
137 | 	br_tag = _tranpose_and_gather_feat(br_tag, br_inds)
138 | 	br_tag = br_tag.view(batch, 1, K)
139 | 	dists = torch.abs(tl_tag - br_tag)
140 | 
141 | 	tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K)
142 | 	br_scores = br_scores.view(batch, 1, K).expand(batch, K, K)
143 | 	scores = (tl_scores + br_scores) / 2
144 | 
145 | 	# reject boxes based on classes
146 | 	tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K)
147 | 	br_clses = br_clses.view(batch, 1, K).expand(batch, K, K)
148 | 	cls_inds = (tl_clses != br_clses)
149 | 
150 | 	# reject boxes based on distances
151 | 	dist_inds = (dists > ae_threshold)
152 | 
153 | 	# reject boxes based on widths and heights
154 | 	width_inds = (br_xs < tl_xs)
155 | 	height_inds = (br_ys < tl_ys)
156 | 
157 | 	scores[cls_inds] = -1
158 | 	scores[dist_inds] = -1
159 | 	scores[width_inds] = -1
160 | 	scores[height_inds] = -1
161 | 
162 | 	scores = scores.view(batch, -1)
163 | 	scores, inds = torch.topk(scores, num_dets)
164 | 	scores = scores.unsqueeze(2)
165 | 
166 | 	bboxes = bboxes.view(batch, -1, 4)
167 | 	bboxes = _gather_feat(bboxes, inds)
168 | 
169 | 	# width = (bboxes[:,:,2] - bboxes[:,:,0]).unsqueeze(2)
170 | 	# height = (bboxes[:,:,2] - bboxes[:,:,0]).unsqueeze(2)
171 | 
172 | 	clses = tl_clses.contiguous().view(batch, -1, 1)
173 | 	clses = _gather_feat(clses, inds).float()
174 | 
175 | 	tl_scores = tl_scores.contiguous().view(batch, -1, 1)
176 | 	tl_scores = _gather_feat(tl_scores, inds).float()
177 | 	br_scores = br_scores.contiguous().view(batch, -1, 1)
178 | 	br_scores = _gather_feat(br_scores, inds).float()
179 | 
180 | 	ct_xs = ct_xs[:, 0, :]
181 | 	ct_ys = ct_ys[:, 0, :]
182 | 
183 | 	center = torch.cat([ct_xs.unsqueeze(2), ct_ys.unsqueeze(2), ct_clses.float().unsqueeze(2), ct_scores.unsqueeze(2)],
184 | 					   dim=2)
185 | 	detections = torch.cat([bboxes, scores, tl_scores, br_scores, clses], dim=2)
186 | 	return detections, center
187 | 
188 | 
189 | def _neg_loss(preds, gt):
190 | 	pos_inds = gt.eq(1)
191 | 	neg_inds = gt.lt(1)
192 | 
193 | 	neg_weights = torch.pow(1 - gt[neg_inds], 4)
194 | 
195 | 	loss = 0
196 | 	for pred in preds:
197 | 		pos_pred = pred[pos_inds]
198 | 		neg_pred = pred[neg_inds]
199 | 
200 | 		pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
201 | 		neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
202 | 
203 | 		num_pos = pos_inds.float().sum()
204 | 		pos_loss = pos_loss.sum()
205 | 		neg_loss = neg_loss.sum()
206 | 
207 | 		if pos_pred.nelement() == 0:
208 | 			loss = loss - neg_loss
209 | 		else:
210 | 			loss = loss - (pos_loss + neg_loss) / num_pos
211 | 	return loss
212 | 
213 | 
214 | def _sigmoid(x):
215 | 	x = torch.clamp(x.sigmoid_(), min=1e-4, max=1 - 1e-4)
216 | 	return x
217 | 
218 | 
219 | def _ae_loss(tag0, tag1, mask):
220 | 	num = mask.sum(dim=1, keepdim=True).float()
221 | 	tag0 = tag0.squeeze()
222 | 	tag1 = tag1.squeeze()
223 | 
224 | 	tag_mean = (tag0 + tag1) / 2
225 | 
226 | 	tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4)
227 | 	tag0 = tag0[mask].sum()
228 | 	tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4)
229 | 	tag1 = tag1[mask].sum()
230 | 	pull = tag0 + tag1
231 | 
232 | 	mask = mask.unsqueeze(1) + mask.unsqueeze(2)
233 | 	mask = mask.eq(2)
234 | 	num = num.unsqueeze(2)
235 | 	num2 = (num - 1) * num
236 | 	dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2)
237 | 	dist = 1 - torch.abs(dist)
238 | 	dist = nn.functional.relu(dist, inplace=True)
239 | 	dist = dist - 1 / (num + 1e-4)
240 | 	dist = dist / (num2 + 1e-4)
241 | 	dist = dist[mask]
242 | 	push = dist.sum()
243 | 	return pull, push
244 | 
245 | 
246 | def _regr_loss(regr, gt_regr, mask):
247 | 	num = mask.float().sum()
248 | 	mask = mask.unsqueeze(2).expand_as(gt_regr)
249 | 
250 | 	regr = regr[mask]
251 | 	gt_regr = gt_regr[mask]
252 | 
253 | 	regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
254 | 	regr_loss = regr_loss / (num + 1e-4)
255 | 	return regr_loss
256 | 


--------------------------------------------------------------------------------
/test/centernet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import pdb
  4 | import json
  5 | import copy
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from PIL import Image, ImageDraw, ImageFont
 10 | import matplotlib.pyplot as plt
 11 | import matplotlib
 12 | 
 13 | from tqdm import tqdm
 14 | from config import system_configs
 15 | from utils import crop_image, normalize_
 16 | from external.nms import soft_nms, soft_nms_merge
 17 | 
 18 | colours = np.random.rand(80, 3)
 19 | 
 20 | 
 21 | def _rescale_dets(detections, ratios, borders, sizes):
 22 | 	xs, ys = detections[..., 0:4:2], detections[..., 1:4:2]
 23 | 	xs /= ratios[:, 1][:, None, None]
 24 | 	ys /= ratios[:, 0][:, None, None]
 25 | 	xs -= borders[:, 2][:, None, None]
 26 | 	ys -= borders[:, 0][:, None, None]
 27 | 	tx_inds = xs[:, :, 0] <= -5
 28 | 	bx_inds = xs[:, :, 1] >= sizes[0, 1] + 5
 29 | 	ty_inds = ys[:, :, 0] <= -5
 30 | 	by_inds = ys[:, :, 1] >= sizes[0, 0] + 5
 31 | 
 32 | 	np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs)
 33 | 	np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys)
 34 | 	detections[:, tx_inds[0, :], 4] = -1
 35 | 	detections[:, bx_inds[0, :], 4] = -1
 36 | 	detections[:, ty_inds[0, :], 4] = -1
 37 | 	detections[:, by_inds[0, :], 4] = -1
 38 | 
 39 | 
 40 | def save_image(data, fn):
 41 | 	sizes = np.shape(data)
 42 | 	height = float(sizes[0])
 43 | 	width = float(sizes[1])
 44 | 
 45 | 	fig = plt.figure()
 46 | 	fig.set_size_inches(width / height, 1, forward=False)
 47 | 	ax = plt.Axes(fig, [0., 0., 1., 1.])
 48 | 	ax.set_axis_off()
 49 | 	fig.add_axes(ax)
 50 | 
 51 | 	ax.imshow(data)
 52 | 	plt.savefig(fn, dpi=height)
 53 | 	plt.close()
 54 | 
 55 | 
 56 | def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3):
 57 | 	detections, center = nnet.test([images], ae_threshold=ae_threshold, K=K, kernel=kernel)
 58 | 	detections = detections.data.cpu().numpy()
 59 | 	center = center.data.cpu().numpy()
 60 | 	return detections, center
 61 | 
 62 | 
 63 | def inference(db, nnet, image, decode_func=kp_decode):
 64 | 	K = db.configs["top_k"]
 65 | 	ae_threshold = db.configs["ae_threshold"]
 66 | 	nms_kernel = db.configs["nms_kernel"]
 67 | 
 68 | 	scales = db.configs["test_scales"]
 69 | 	weight_exp = db.configs["weight_exp"]
 70 | 	merge_bbox = db.configs["merge_bbox"]
 71 | 	categories = db.configs["categories"]
 72 | 	nms_threshold = db.configs["nms_threshold"]
 73 | 	max_per_image = db.configs["max_per_image"]
 74 | 	nms_algorithm = {
 75 | 		"nms": 0,
 76 | 		"linear_soft_nms": 1,
 77 | 		"exp_soft_nms": 2
 78 | 	}[db.configs["nms_algorithm"]]
 79 | 
 80 | 	height, width = image.shape[0:2]
 81 | 	detections, center_points = [], []
 82 | 
 83 | 	for scale in scales:
 84 | 		new_height = int(height * scale)
 85 | 		new_width = int(width * scale)
 86 | 		new_center = np.array([new_height // 2, new_width // 2])
 87 | 
 88 | 		inp_height = new_height | 127
 89 | 		inp_width = new_width | 127
 90 | 
 91 | 		images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
 92 | 		ratios = np.zeros((1, 2), dtype=np.float32)
 93 | 		borders = np.zeros((1, 4), dtype=np.float32)
 94 | 		sizes = np.zeros((1, 2), dtype=np.float32)
 95 | 
 96 | 		out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
 97 | 		height_ratio = out_height / inp_height
 98 | 		width_ratio = out_width / inp_width
 99 | 
100 | 		resized_image = cv2.resize(image, (new_width, new_height))
101 | 		resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])
102 | 
103 | 		resized_image = resized_image / 255.
104 | 		normalize_(resized_image, db.mean, db.std)
105 | 
106 | 		images[0] = resized_image.transpose((2, 0, 1))
107 | 		borders[0] = border
108 | 		sizes[0] = [int(height * scale), int(width * scale)]
109 | 		ratios[0] = [height_ratio, width_ratio]
110 | 
111 | 		images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
112 | 		images = torch.from_numpy(images)
113 | 		dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
114 | 		dets = dets.reshape(2, -1, 8)  # bboxes, scores, tl_scores, br_scores, clses
115 | 		center = center.reshape(2, -1, 4)  # ct_xs, ct_ys, ct_clses, ct_scores
116 | 		dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]  # flip
117 | 		center[1, :, [0]] = out_width - center[1, :, [0]]  # horizontal flip
118 | 		dets = dets.reshape(1, -1, 8)
119 | 		center = center.reshape(1, -1, 4)
120 | 
121 | 		_rescale_dets(dets, ratios, borders, sizes)
122 | 		center[..., [0]] /= ratios[:, 1][:, None, None]  # remap to origin image
123 | 		center[..., [1]] /= ratios[:, 0][:, None, None]
124 | 		center[..., [0]] -= borders[:, 2][:, None, None]
125 | 		center[..., [1]] -= borders[:, 0][:, None, None]
126 | 		np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]])
127 | 		np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]])
128 | 		dets[:, :, 0:4] /= scale
129 | 		center[:, :, 0:2] /= scale  # remap to origin image
130 | 
131 | 		if scale == 1:
132 | 			center_points.append(center)
133 | 		detections.append(dets)
134 | 
135 | 	detections = np.concatenate(detections, axis=1)
136 | 	center_points = np.concatenate(center_points, axis=1)
137 | 
138 | 	classes = detections[..., -1]
139 | 	classes = classes[0]
140 | 	detections = detections[0]
141 | 	center_points = center_points[0]
142 | 
143 | 	valid_ind = detections[:, 4] > -1
144 | 	valid_detections = detections[valid_ind]
145 | 
146 | 	box_width = valid_detections[:, 2] - valid_detections[:, 0]
147 | 	box_height = valid_detections[:, 3] - valid_detections[:, 1]
148 | 
149 | 	s_ind = (box_width * box_height <= 22500)
150 | 	l_ind = (box_width * box_height > 22500)
151 | 
152 | 	s_detections = valid_detections[s_ind]
153 | 	l_detections = valid_detections[l_ind]
154 | 	# trisection
155 | 	s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3  # x + (y-x)/3
156 | 	s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3  # x +2(y-x)/3
157 | 	s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
158 | 	s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3
159 | 
160 | 	s_temp_score = copy.copy(s_detections[:, 4])
161 | 	s_detections[:, 4] = -1
162 | 
163 | 	center_x = center_points[:, 0][:, np.newaxis]
164 | 	center_y = center_points[:, 1][:, np.newaxis]
165 | 	s_left_x = s_left_x[np.newaxis, :]
166 | 	s_right_x = s_right_x[np.newaxis, :]
167 | 	s_top_y = s_top_y[np.newaxis, :]
168 | 	s_bottom_y = s_bottom_y[np.newaxis, :]
169 | 	# located in center region
170 | 	ind_lx = (center_x - s_left_x) > 0
171 | 	ind_rx = (center_x - s_right_x) < 0
172 | 	ind_ty = (center_y - s_top_y) > 0
173 | 	ind_by = (center_y - s_bottom_y) < 0
174 | 	# same classes
175 | 	ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
176 | 	ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1
177 | 	index_s_new_score = np.argmax(
178 | 		((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score],
179 | 		axis=0)  # select the box having center located in the center region
180 | 	s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[index_s_new_score, 3]) / 3
181 | 
182 | 	l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
183 | 	l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
184 | 	l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
185 | 	l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5
186 | 
187 | 	l_temp_score = copy.copy(l_detections[:, 4])
188 | 	l_detections[:, 4] = -1
189 | 
190 | 	center_x = center_points[:, 0][:, np.newaxis]
191 | 	center_y = center_points[:, 1][:, np.newaxis]
192 | 	l_left_x = l_left_x[np.newaxis, :]
193 | 	l_right_x = l_right_x[np.newaxis, :]
194 | 	l_top_y = l_top_y[np.newaxis, :]
195 | 	l_bottom_y = l_bottom_y[np.newaxis, :]
196 | 
197 | 	ind_lx = (center_x - l_left_x) > 0
198 | 	ind_rx = (center_x - l_right_x) < 0
199 | 	ind_ty = (center_y - l_top_y) > 0
200 | 	ind_by = (center_y - l_bottom_y) < 0
201 | 	ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0
202 | 	ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)), axis=0) == 1
203 | 	index_l_new_score = np.argmax(
204 | 		((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0)
205 | 	l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[index_l_new_score, 3]) / 3
206 | 
207 | 	detections = np.concatenate([l_detections, s_detections], axis=0)
208 | 	detections = detections[np.argsort(-detections[:, 4])]  # resort according to new scores
209 | 	classes = detections[..., -1]
210 | 
211 | 	# reject detections with negative scores
212 | 	keep_inds = (detections[:, 4] > -1)
213 | 	detections = detections[keep_inds]
214 | 	classes = classes[keep_inds]
215 | 
216 | 	# soft_nms
217 | 	top_bboxes = {}
218 | 	for j in range(categories):
219 | 		keep_inds = (classes == j)
220 | 		top_bboxes[j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
221 | 		if merge_bbox:
222 | 			soft_nms_merge(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm, weight_exp=weight_exp)
223 | 		else:
224 | 			soft_nms(top_bboxes[j + 1], Nt=nms_threshold, method=nms_algorithm)
225 | 		top_bboxes[j + 1] = top_bboxes[j + 1][:, 0:5]
226 | 
227 | 	scores = np.hstack([top_bboxes[j][:, -1] for j in range(1, categories + 1)])
228 | 	# select boxes
229 | 	if len(scores) > max_per_image:
230 | 		kth = len(scores) - max_per_image
231 | 		thresh = np.partition(scores, kth)[kth]
232 | 		for j in range(1, categories + 1):
233 | 			keep_inds = (top_bboxes[j][:, -1] >= thresh)
234 | 			top_bboxes[j] = top_bboxes[j][keep_inds]
235 | 
236 | 	return top_bboxes


--------------------------------------------------------------------------------
/external/nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | # ----------------------------------------------------------
  9 | # Soft-NMS: Improving Object Detection With One Line of Code
 10 | # Copyright (c) University of Maryland, College Park
 11 | # Licensed under The MIT License [see LICENSE for details]
 12 | # Written by Navaneeth Bodla and Bharat Singh
 13 | # ----------------------------------------------------------
 14 | 
 15 | import numpy as np
 16 | cimport numpy as np
 17 | 
 18 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 19 |     return a if a >= b else b
 20 | 
 21 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 22 |     return a if a <= b else b
 23 | 
 24 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 25 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 26 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 27 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 28 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 29 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 30 | 
 31 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 32 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 33 | 
 34 |     cdef int ndets = dets.shape[0]
 35 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 36 |             np.zeros((ndets), dtype=np.int)
 37 | 
 38 |     # nominal indices
 39 |     cdef int _i, _j
 40 |     # sorted indices
 41 |     cdef int i, j
 42 |     # temp variables for box i's (the box currently under consideration)
 43 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 44 |     # variables for computing overlap with box j (lower scoring box)
 45 |     cdef np.float32_t xx1, yy1, xx2, yy2
 46 |     cdef np.float32_t w, h
 47 |     cdef np.float32_t inter, ovr
 48 | 
 49 |     keep = []
 50 |     for _i in range(ndets):
 51 |         i = order[_i]
 52 |         if suppressed[i] == 1:
 53 |             continue
 54 |         keep.append(i)
 55 |         ix1 = x1[i]
 56 |         iy1 = y1[i]
 57 |         ix2 = x2[i]
 58 |         iy2 = y2[i]
 59 |         iarea = areas[i]
 60 |         for _j in range(_i + 1, ndets):
 61 |             j = order[_j]
 62 |             if suppressed[j] == 1:
 63 |                 continue
 64 |             xx1 = max(ix1, x1[j])
 65 |             yy1 = max(iy1, y1[j])
 66 |             xx2 = min(ix2, x2[j])
 67 |             yy2 = min(iy2, y2[j])
 68 |             w = max(0.0, xx2 - xx1 + 1)
 69 |             h = max(0.0, yy2 - yy1 + 1)
 70 |             inter = w * h
 71 |             ovr = inter / (iarea + areas[j] - inter)
 72 |             if ovr >= thresh:
 73 |                 suppressed[j] = 1
 74 | 
 75 |     return keep
 76 | 
 77 | def soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
 78 |     cdef unsigned int N = boxes.shape[0]
 79 |     cdef float iw, ih, box_area
 80 |     cdef float ua
 81 |     cdef int pos = 0
 82 |     cdef float maxscore = 0
 83 |     cdef int maxpos = 0
 84 |     cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
 85 | 
 86 |     for i in range(N):
 87 |         maxscore = boxes[i, 4]
 88 |         maxpos = i
 89 | 
 90 |         tx1 = boxes[i,0]
 91 |         ty1 = boxes[i,1]
 92 |         tx2 = boxes[i,2]
 93 |         ty2 = boxes[i,3]
 94 |         ts = boxes[i,4]
 95 | 
 96 |         pos = i + 1
 97 |         # get max box
 98 |         while pos < N:
 99 |             if maxscore < boxes[pos, 4]:
100 |                 maxscore = boxes[pos, 4]
101 |                 maxpos = pos
102 |             pos = pos + 1
103 | 
104 |         # add max box as a detection 
105 |         boxes[i,0] = boxes[maxpos,0]
106 |         boxes[i,1] = boxes[maxpos,1]
107 |         boxes[i,2] = boxes[maxpos,2]
108 |         boxes[i,3] = boxes[maxpos,3]
109 |         boxes[i,4] = boxes[maxpos,4]
110 | 
111 |         # swap ith box with position of max box
112 |         boxes[maxpos,0] = tx1
113 |         boxes[maxpos,1] = ty1
114 |         boxes[maxpos,2] = tx2
115 |         boxes[maxpos,3] = ty2
116 |         boxes[maxpos,4] = ts
117 | 
118 |         tx1 = boxes[i,0]
119 |         ty1 = boxes[i,1]
120 |         tx2 = boxes[i,2]
121 |         ty2 = boxes[i,3]
122 |         ts = boxes[i,4]
123 | 
124 |         pos = i + 1
125 |         # NMS iterations, note that N changes if detection boxes fall below threshold
126 |         while pos < N:
127 |             x1 = boxes[pos, 0]
128 |             y1 = boxes[pos, 1]
129 |             x2 = boxes[pos, 2]
130 |             y2 = boxes[pos, 3]
131 |             s = boxes[pos, 4]
132 | 
133 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
134 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
135 |             if iw > 0:
136 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
137 |                 if ih > 0:
138 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
139 |                     ov = iw * ih / ua #iou between max box and detection box
140 | 
141 |                     if method == 1: # linear
142 |                         if ov > Nt: 
143 |                             weight = 1 - ov
144 |                         else:
145 |                             weight = 1
146 |                     elif method == 2: # gaussian
147 |                         weight = np.exp(-(ov * ov)/sigma)
148 |                     else: # original NMS
149 |                         if ov > Nt: 
150 |                             weight = 0
151 |                         else:
152 |                             weight = 1
153 | 
154 |                     boxes[pos, 4] = weight*boxes[pos, 4]
155 |                                 
156 |                     # if box score falls below threshold, discard the box by swapping with last box
157 |                     # update N
158 |                     if boxes[pos, 4] < threshold:
159 |                         boxes[pos,0] = boxes[N-1, 0]
160 |                         boxes[pos,1] = boxes[N-1, 1]
161 |                         boxes[pos,2] = boxes[N-1, 2]
162 |                         boxes[pos,3] = boxes[N-1, 3]
163 |                         boxes[pos,4] = boxes[N-1, 4]
164 |                         N = N - 1
165 |                         pos = pos - 1
166 | 
167 |             pos = pos + 1
168 | 
169 |     keep = [i for i in range(N)]
170 |     return keep
171 | 
172 | def soft_nms_merge(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0, float weight_exp=6):
173 |     cdef unsigned int N = boxes.shape[0]
174 |     cdef float iw, ih, box_area
175 |     cdef float ua
176 |     cdef int pos = 0
177 |     cdef float maxscore = 0
178 |     cdef int maxpos = 0
179 |     cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
180 |     cdef float mx1,mx2,my1,my2,mts,mbs,mw
181 | 
182 |     for i in range(N):
183 |         maxscore = boxes[i, 4]
184 |         maxpos = i
185 | 
186 |         tx1 = boxes[i,0]
187 |         ty1 = boxes[i,1]
188 |         tx2 = boxes[i,2]
189 |         ty2 = boxes[i,3]
190 |         ts = boxes[i,4]
191 | 
192 |         pos = i + 1
193 |         # get max box
194 |         while pos < N:
195 |             if maxscore < boxes[pos, 4]:
196 |                 maxscore = boxes[pos, 4]
197 |                 maxpos = pos
198 |             pos = pos + 1
199 | 
200 |         # add max box as a detection 
201 |         boxes[i,0] = boxes[maxpos,0]
202 |         boxes[i,1] = boxes[maxpos,1]
203 |         boxes[i,2] = boxes[maxpos,2]
204 |         boxes[i,3] = boxes[maxpos,3]
205 |         boxes[i,4] = boxes[maxpos,4]
206 | 
207 |         mx1 = boxes[i, 0] * boxes[i, 5]
208 |         my1 = boxes[i, 1] * boxes[i, 5]
209 |         mx2 = boxes[i, 2] * boxes[i, 6]
210 |         my2 = boxes[i, 3] * boxes[i, 6]
211 |         mts = boxes[i, 5]
212 |         mbs = boxes[i, 6]
213 | 
214 |         # swap ith box with position of max box
215 |         boxes[maxpos,0] = tx1
216 |         boxes[maxpos,1] = ty1
217 |         boxes[maxpos,2] = tx2
218 |         boxes[maxpos,3] = ty2
219 |         boxes[maxpos,4] = ts
220 | 
221 |         tx1 = boxes[i,0]
222 |         ty1 = boxes[i,1]
223 |         tx2 = boxes[i,2]
224 |         ty2 = boxes[i,3]
225 |         ts = boxes[i,4]
226 | 
227 |         pos = i + 1
228 |         # NMS iterations, note that N changes if detection boxes fall below threshold
229 |         while pos < N:
230 |             x1 = boxes[pos, 0]
231 |             y1 = boxes[pos, 1]
232 |             x2 = boxes[pos, 2]
233 |             y2 = boxes[pos, 3]
234 |             s = boxes[pos, 4]
235 | 
236 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
237 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
238 |             if iw > 0:
239 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
240 |                 if ih > 0:
241 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
242 |                     ov = iw * ih / ua #iou between max box and detection box
243 | 
244 |                     if method == 1: # linear
245 |                         if ov > Nt: 
246 |                             weight = 1 - ov
247 |                         else:
248 |                             weight = 1
249 |                     elif method == 2: # gaussian
250 |                         weight = np.exp(-(ov * ov)/sigma)
251 |                     else: # original NMS
252 |                         if ov > Nt: 
253 |                             weight = 0
254 |                         else:
255 |                             weight = 1
256 | 
257 |                     mw  = (1 - weight) ** weight_exp
258 |                     mx1 = mx1 + boxes[pos, 0] * boxes[pos, 5] * mw
259 |                     my1 = my1 + boxes[pos, 1] * boxes[pos, 5] * mw
260 |                     mx2 = mx2 + boxes[pos, 2] * boxes[pos, 6] * mw
261 |                     my2 = my2 + boxes[pos, 3] * boxes[pos, 6] * mw
262 |                     mts = mts + boxes[pos, 5] * mw
263 |                     mbs = mbs + boxes[pos, 6] * mw
264 | 
265 |                     boxes[pos, 4] = weight*boxes[pos, 4]
266 |                                 
267 |                     # if box score falls below threshold, discard the box by swapping with last box
268 |                     # update N
269 |                     if boxes[pos, 4] < threshold:
270 |                         boxes[pos,0] = boxes[N-1, 0]
271 |                         boxes[pos,1] = boxes[N-1, 1]
272 |                         boxes[pos,2] = boxes[N-1, 2]
273 |                         boxes[pos,3] = boxes[N-1, 3]
274 |                         boxes[pos,4] = boxes[N-1, 4]
275 |                         N = N - 1
276 |                         pos = pos - 1
277 | 
278 |             pos = pos + 1
279 | 
280 |         boxes[i, 0] = mx1 / mts
281 |         boxes[i, 1] = my1 / mts
282 |         boxes[i, 2] = mx2 / mbs
283 |         boxes[i, 3] = my2 / mbs
284 | 
285 |     keep = [i for i in range(N)]
286 |     return keep
287 | 


--------------------------------------------------------------------------------
/models/py_utils/kp.py:
--------------------------------------------------------------------------------
  1 | import pdb
  2 | import torch
  3 | 
  4 | import numpy as np
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | from .utils import convolution, residual
  9 | from .utils import make_layer, make_layer_revr
 10 | 
 11 | from .kp_utils import _tranpose_and_gather_feat, _decode
 12 | from .kp_utils import _sigmoid, _ae_loss, _regr_loss, _neg_loss
 13 | from .kp_utils import make_tl_layer, make_br_layer, make_kp_layer, make_ct_layer
 14 | from .kp_utils import make_pool_layer, make_unpool_layer
 15 | from .kp_utils import make_merge_layer, make_inter_layer, make_cnv_layer
 16 | 
 17 | 
 18 | class kp_module(nn.Module):
 19 | 	def __init__(
 20 | 			self, n, dims, modules, layer=residual,
 21 | 			make_up_layer=make_layer, make_low_layer=make_layer,
 22 | 			make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
 23 | 			make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
 24 | 			make_merge_layer=make_merge_layer, **kwargs
 25 | 	):
 26 | 		super(kp_module, self).__init__()
 27 | 
 28 | 		self.n = n
 29 | 
 30 | 		curr_mod = modules[0]
 31 | 		next_mod = modules[1]
 32 | 
 33 | 		curr_dim = dims[0]
 34 | 		next_dim = dims[1]
 35 | 
 36 | 		self.up1 = make_up_layer(
 37 | 			3, curr_dim, curr_dim, curr_mod,
 38 | 			layer=layer, **kwargs
 39 | 		)
 40 | 		self.max1 = make_pool_layer(curr_dim)
 41 | 		self.low1 = make_hg_layer(
 42 | 			3, curr_dim, next_dim, curr_mod,
 43 | 			layer=layer, **kwargs
 44 | 		)
 45 | 		self.low2 = kp_module(
 46 | 			n - 1, dims[1:], modules[1:], layer=layer,
 47 | 			make_up_layer=make_up_layer,
 48 | 			make_low_layer=make_low_layer,
 49 | 			make_hg_layer=make_hg_layer,
 50 | 			make_hg_layer_revr=make_hg_layer_revr,
 51 | 			make_pool_layer=make_pool_layer,
 52 | 			make_unpool_layer=make_unpool_layer,
 53 | 			make_merge_layer=make_merge_layer,
 54 | 			**kwargs
 55 | 		) if self.n > 1 else \
 56 | 			make_low_layer(
 57 | 				3, next_dim, next_dim, next_mod,
 58 | 				layer=layer, **kwargs
 59 | 			)
 60 | 		self.low3 = make_hg_layer_revr(
 61 | 			3, next_dim, curr_dim, curr_mod,
 62 | 			layer=layer, **kwargs
 63 | 		)
 64 | 		self.up2 = make_unpool_layer(curr_dim)
 65 | 
 66 | 		self.merge = make_merge_layer(curr_dim)
 67 | 
 68 | 	def forward(self, x):
 69 | 		up1 = self.up1(x)
 70 | 		max1 = self.max1(x)
 71 | 		low1 = self.low1(max1)
 72 | 		low2 = self.low2(low1)
 73 | 		low3 = self.low3(low2)
 74 | 		up2 = self.up2(low3)
 75 | 		return self.merge(up1, up2)
 76 | 
 77 | 
 78 | class kp(nn.Module):
 79 | 	def __init__(
 80 | 			self, db, n, nstack, dims, modules, out_dim, pre=None, cnv_dim=256,
 81 | 			make_tl_layer=make_tl_layer, make_br_layer=make_br_layer, make_ct_layer=make_ct_layer,
 82 | 			make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,
 83 | 			make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,
 84 | 			make_up_layer=make_layer, make_low_layer=make_layer,
 85 | 			make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
 86 | 			make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
 87 | 			make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer,
 88 | 			kp_layer=residual
 89 | 	):
 90 | 		super(kp, self).__init__()
 91 | 
 92 | 		self.nstack = nstack
 93 | 		self._decode = _decode
 94 | 		self._db = db
 95 | 		self.K = self._db.configs["top_k"]
 96 | 		self.ae_threshold = self._db.configs["ae_threshold"]
 97 | 		self.kernel = self._db.configs["nms_kernel"]
 98 | 		self.input_size = self._db.configs["input_size"][0]
 99 | 		self.output_size = self._db.configs["output_sizes"][0][0]
100 | 
101 | 		curr_dim = dims[0]
102 | 
103 | 		self.pre = nn.Sequential(
104 | 			convolution(7, 3, 128, stride=2),
105 | 			residual(3, 128, 256, stride=2)
106 | 		) if pre is None else pre
107 | 
108 | 		self.kps = nn.ModuleList([
109 | 			kp_module(
110 | 				n, dims, modules, layer=kp_layer,
111 | 				make_up_layer=make_up_layer,
112 | 				make_low_layer=make_low_layer,
113 | 				make_hg_layer=make_hg_layer,
114 | 				make_hg_layer_revr=make_hg_layer_revr,
115 | 				make_pool_layer=make_pool_layer,
116 | 				make_unpool_layer=make_unpool_layer,
117 | 				make_merge_layer=make_merge_layer
118 | 			) for _ in range(nstack)
119 | 		])
120 | 		self.cnvs = nn.ModuleList([
121 | 			make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
122 | 		])
123 | 
124 | 		self.tl_cnvs = nn.ModuleList([
125 | 			make_tl_layer(cnv_dim) for _ in range(nstack)
126 | 		])
127 | 		self.br_cnvs = nn.ModuleList([
128 | 			make_br_layer(cnv_dim) for _ in range(nstack)
129 | 		])
130 | 
131 | 		self.ct_cnvs = nn.ModuleList([
132 | 			make_ct_layer(cnv_dim) for _ in range(nstack)
133 | 		])
134 | 
135 | 		## keypoint heatmaps
136 | 		self.tl_heats = nn.ModuleList([
137 | 			make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack)
138 | 		])
139 | 		self.br_heats = nn.ModuleList([
140 | 			make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack)
141 | 		])
142 | 
143 | 		self.ct_heats = nn.ModuleList([
144 | 			make_heat_layer(cnv_dim, curr_dim, out_dim) for _ in range(nstack)
145 | 		])
146 | 
147 | 		## tags
148 | 		self.tl_tags = nn.ModuleList([
149 | 			make_tag_layer(cnv_dim, curr_dim, 1) for _ in range(nstack)
150 | 		])
151 | 		self.br_tags = nn.ModuleList([
152 | 			make_tag_layer(cnv_dim, curr_dim, 1) for _ in range(nstack)
153 | 		])
154 | 
155 | 		for tl_heat, br_heat, ct_heat in zip(self.tl_heats, self.br_heats, self.ct_heats):
156 | 			tl_heat[-1].bias.data.fill_(-2.19)
157 | 			br_heat[-1].bias.data.fill_(-2.19)
158 | 			ct_heat[-1].bias.data.fill_(-2.19)
159 | 
160 | 		self.inters = nn.ModuleList([
161 | 			make_inter_layer(curr_dim) for _ in range(nstack - 1)
162 | 		])
163 | 
164 | 		self.inters_ = nn.ModuleList([
165 | 			nn.Sequential(
166 | 				nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
167 | 				nn.BatchNorm2d(curr_dim)
168 | 			) for _ in range(nstack - 1)
169 | 		])
170 | 		self.cnvs_ = nn.ModuleList([
171 | 			nn.Sequential(
172 | 				nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
173 | 				nn.BatchNorm2d(curr_dim)
174 | 			) for _ in range(nstack - 1)
175 | 		])
176 | 
177 | 		self.tl_regrs = nn.ModuleList([
178 | 			make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack)
179 | 		])
180 | 		self.br_regrs = nn.ModuleList([
181 | 			make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack)
182 | 		])
183 | 		self.ct_regrs = nn.ModuleList([
184 | 			make_regr_layer(cnv_dim, curr_dim, 2) for _ in range(nstack)
185 | 		])
186 | 
187 | 		self.relu = nn.ReLU(inplace=True)
188 | 
189 | 	def _train(self, *xs):
190 | 		image = xs[0]
191 | 		tl_inds = xs[1]
192 | 		br_inds = xs[2]
193 | 		ct_inds = xs[3]
194 | 
195 | 		inter = self.pre(image)
196 | 		outs = []
197 | 
198 | 		layers = zip(
199 | 			self.kps, self.cnvs,
200 | 			self.tl_cnvs, self.br_cnvs,
201 | 			self.ct_cnvs, self.tl_heats,
202 | 			self.br_heats, self.ct_heats,
203 | 			self.tl_tags, self.br_tags,
204 | 			self.tl_regrs, self.br_regrs,
205 | 			self.ct_regrs
206 | 		)
207 | 		for ind, layer in enumerate(layers):
208 | 			kp_, cnv_ = layer[0:2]
209 | 			tl_cnv_, br_cnv_ = layer[2:4]
210 | 			ct_cnv_, tl_heat_ = layer[4:6]
211 | 			br_heat_, ct_heat_ = layer[6:8]
212 | 			tl_tag_, br_tag_ = layer[8:10]
213 | 			tl_regr_, br_regr_ = layer[10:12]
214 | 			ct_regr_ = layer[12]
215 | 
216 | 			kp = kp_(inter)
217 | 			cnv = cnv_(kp)
218 | 
219 | 			tl_cnv = tl_cnv_(cnv)
220 | 			br_cnv = br_cnv_(cnv)
221 | 			ct_cnv = ct_cnv_(cnv)
222 | 
223 | 			tl_heat, br_heat, ct_heat = tl_heat_(tl_cnv), br_heat_(br_cnv), ct_heat_(ct_cnv)
224 | 			tl_tag, br_tag = tl_tag_(tl_cnv), br_tag_(br_cnv)
225 | 			tl_regr, br_regr, ct_regr = tl_regr_(tl_cnv), br_regr_(br_cnv), ct_regr_(ct_cnv)
226 | 
227 | 			tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds)
228 | 			br_tag = _tranpose_and_gather_feat(br_tag, br_inds)
229 | 			tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds)
230 | 			br_regr = _tranpose_and_gather_feat(br_regr, br_inds)
231 | 			ct_regr = _tranpose_and_gather_feat(ct_regr, ct_inds)
232 | 			# here tl_tag and br_tag are embedding scalar respectively to group tl and br
233 | 
234 | 			outs += [tl_heat, br_heat, ct_heat, tl_tag, br_tag, tl_regr, br_regr, ct_regr]
235 | 
236 | 			if ind < self.nstack - 1:
237 | 				inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
238 | 				inter = self.relu(inter)
239 | 				inter = self.inters[ind](inter)
240 | 
241 | 		return outs
242 | 
243 | 	def _test(self, *xs, **kwargs):
244 | 		image = xs[0]
245 | 
246 | 		inter = self.pre(image)
247 | 
248 | 		outs = []
249 | 
250 | 		layers = zip(
251 | 			self.kps, self.cnvs,
252 | 			self.tl_cnvs, self.br_cnvs,
253 | 			self.ct_cnvs, self.tl_heats,
254 | 			self.br_heats, self.ct_heats,
255 | 			self.tl_tags, self.br_tags,
256 | 			self.tl_regrs, self.br_regrs,
257 | 			self.ct_regrs
258 | 		)
259 | 		for ind, layer in enumerate(layers):
260 | 			kp_, cnv_ = layer[0:2]
261 | 			tl_cnv_, br_cnv_ = layer[2:4]
262 | 			ct_cnv_, tl_heat_ = layer[4:6]
263 | 			br_heat_, ct_heat_ = layer[6:8]
264 | 			tl_tag_, br_tag_ = layer[8:10]
265 | 			tl_regr_, br_regr_ = layer[10:12]
266 | 			ct_regr_ = layer[12]
267 | 
268 | 			kp = kp_(inter)
269 | 			cnv = cnv_(kp)
270 | 
271 | 			if ind == self.nstack - 1:
272 | 				tl_cnv = tl_cnv_(cnv)
273 | 				br_cnv = br_cnv_(cnv)
274 | 				ct_cnv = ct_cnv_(cnv)
275 | 
276 | 				tl_heat, br_heat, ct_heat = tl_heat_(tl_cnv), br_heat_(br_cnv), ct_heat_(ct_cnv)
277 | 				tl_tag, br_tag = tl_tag_(tl_cnv), br_tag_(br_cnv)
278 | 				tl_regr, br_regr, ct_regr = tl_regr_(tl_cnv), br_regr_(br_cnv), ct_regr_(ct_cnv)
279 | 
280 | 				outs += [tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr,
281 | 						 ct_heat, ct_regr]
282 | 
283 | 			if ind < self.nstack - 1:
284 | 				inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
285 | 				inter = self.relu(inter)
286 | 				inter = self.inters[ind](inter)
287 | 
288 | 		return self._decode(*outs[-8:], **kwargs)
289 | 
290 | 	def forward(self, *xs, **kwargs):
291 | 		if len(xs) > 1:
292 | 			return self._train(*xs, **kwargs)
293 | 		return self._test(*xs, **kwargs)
294 | 
295 | 
296 | class AELoss(nn.Module):
297 | 	def __init__(self, pull_weight=1, push_weight=1, regr_weight=1, focal_loss=_neg_loss):
298 | 		super(AELoss, self).__init__()
299 | 
300 | 		self.pull_weight = pull_weight
301 | 		self.push_weight = push_weight
302 | 		self.regr_weight = regr_weight
303 | 		self.focal_loss = focal_loss
304 | 		self.ae_loss = _ae_loss
305 | 		self.regr_loss = _regr_loss
306 | 
307 | 	def forward(self, outs, targets):
308 | 		stride = 8
309 | 
310 | 		tl_heats = outs[0::stride]
311 | 		br_heats = outs[1::stride]
312 | 		ct_heats = outs[2::stride]
313 | 		tl_tags = outs[3::stride]
314 | 		br_tags = outs[4::stride]
315 | 		tl_regrs = outs[5::stride]
316 | 		br_regrs = outs[6::stride]
317 | 		ct_regrs = outs[7::stride]
318 | 
319 | 		gt_tl_heat = targets[0]
320 | 		gt_br_heat = targets[1]
321 | 		gt_ct_heat = targets[2]
322 | 		gt_mask = targets[3]
323 | 		gt_tl_regr = targets[4]
324 | 		gt_br_regr = targets[5]
325 | 		gt_ct_regr = targets[6]
326 | 
327 | 		# focal loss
328 | 		focal_loss = 0
329 | 
330 | 		tl_heats = [_sigmoid(t) for t in tl_heats]
331 | 		br_heats = [_sigmoid(b) for b in br_heats]
332 | 		ct_heats = [_sigmoid(c) for c in ct_heats]
333 | 
334 | 		focal_loss += self.focal_loss(tl_heats, gt_tl_heat)
335 | 		focal_loss += self.focal_loss(br_heats, gt_br_heat)
336 | 		focal_loss += self.focal_loss(ct_heats, gt_ct_heat)
337 | 
338 | 		# tag loss
339 | 		pull_loss = 0
340 | 		push_loss = 0
341 | 
342 | 		for tl_tag, br_tag in zip(tl_tags, br_tags):
343 | 			pull, push = self.ae_loss(tl_tag, br_tag, gt_mask)
344 | 			pull_loss += pull
345 | 			push_loss += push
346 | 		pull_loss = self.pull_weight * pull_loss
347 | 		push_loss = self.push_weight * push_loss
348 | 
349 | 		regr_loss = 0
350 | 		for tl_regr, br_regr, ct_regr in zip(tl_regrs, br_regrs, ct_regrs):
351 | 			regr_loss += self.regr_loss(tl_regr, gt_tl_regr, gt_mask)
352 | 			regr_loss += self.regr_loss(br_regr, gt_br_regr, gt_mask)
353 | 			regr_loss += self.regr_loss(ct_regr, gt_ct_regr, gt_mask)
354 | 		regr_loss = self.regr_weight * regr_loss
355 | 
356 | 		loss = (focal_loss + pull_loss + push_loss + regr_loss) / len(tl_heats)
357 | 		return loss.unsqueeze(0), (focal_loss / len(tl_heats)).unsqueeze(0), (pull_loss / len(tl_heats)).unsqueeze(0), (
358 | 					push_loss / len(tl_heats)).unsqueeze(0), (regr_loss / len(tl_heats)).unsqueeze(0)
359 | 


--------------------------------------------------------------------------------
/test/pedestrian.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import pdb
  4 | import json
  5 | import copy
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from PIL import Image, ImageDraw, ImageFont
 10 | import matplotlib.pyplot as plt
 11 | import matplotlib
 12 | 
 13 | from tqdm import tqdm
 14 | from config import system_configs
 15 | from utils import crop_image, normalize_
 16 | from external.nms import soft_nms, soft_nms_merge
 17 | 
 18 | colours = np.random.rand(80, 3)
 19 | 
 20 | 
 21 | def _rescale_dets(detections, ratios, borders, sizes):
 22 | 	xs, ys = detections[..., 0:4:2], detections[..., 1:4:2]
 23 | 	xs /= ratios[:, 1][:, None, None]
 24 | 	ys /= ratios[:, 0][:, None, None]
 25 | 	xs -= borders[:, 2][:, None, None]
 26 | 	ys -= borders[:, 0][:, None, None]
 27 | 	tx_inds = xs[:, :, 0] <= -5
 28 | 	bx_inds = xs[:, :, 1] >= sizes[0, 1] + 5
 29 | 	ty_inds = ys[:, :, 0] <= -5
 30 | 	by_inds = ys[:, :, 1] >= sizes[0, 0] + 5
 31 | 
 32 | 	np.clip(xs, 0, sizes[:, 1][:, None, None], out=xs)
 33 | 	np.clip(ys, 0, sizes[:, 0][:, None, None], out=ys)
 34 | 	detections[:, tx_inds[0, :], 4] = -1
 35 | 	detections[:, bx_inds[0, :], 4] = -1
 36 | 	detections[:, ty_inds[0, :], 4] = -1
 37 | 	detections[:, by_inds[0, :], 4] = -1
 38 | 
 39 | 
 40 | def save_image(data, fn):
 41 | 	sizes = np.shape(data)
 42 | 	height = float(sizes[0])
 43 | 	width = float(sizes[1])
 44 | 
 45 | 	fig = plt.figure()
 46 | 	fig.set_size_inches(width / height, 1, forward=False)
 47 | 	ax = plt.Axes(fig, [0., 0., 1., 1.])
 48 | 	ax.set_axis_off()
 49 | 	fig.add_axes(ax)
 50 | 
 51 | 	ax.imshow(data)
 52 | 	plt.savefig(fn, dpi=height)
 53 | 	plt.close()
 54 | 
 55 | 
 56 | def kp_decode(nnet, images, K, ae_threshold=0.5, kernel=3):
 57 | 	detections, center = nnet.test([images], ae_threshold=ae_threshold, K=K, kernel=kernel)
 58 | 	detections = detections.data.cpu().numpy()
 59 | 	center = center.data.cpu().numpy()
 60 | 	return detections, center
 61 | 
 62 | 
 63 | def kp_detection(db, nnet, result_dir, debug=False, decode_func=kp_decode):
 64 | 	debug_dir = os.path.join(result_dir, "debug")
 65 | 	if not os.path.exists(debug_dir):
 66 | 		os.makedirs(debug_dir)
 67 | 
 68 | 	if db.split != "trainval":
 69 | 		db_inds = db.db_inds[:100] if debug else db.db_inds
 70 | 	else:
 71 | 		db_inds = db.db_inds[:100] if debug else db.db_inds[:5000]
 72 | 	num_images = db_inds.size
 73 | 
 74 | 	K = db.configs["top_k"]
 75 | 	ae_threshold = db.configs["ae_threshold"]  # group corners
 76 | 	nms_kernel = db.configs["nms_kernel"]  # nms for corners
 77 | 
 78 | 	scales = db.configs["test_scales"]
 79 | 	weight_exp = db.configs["weight_exp"]  # for softnms
 80 | 	merge_bbox = db.configs["merge_bbox"]
 81 | 	categories = db.configs["categories"]
 82 | 	nms_threshold = db.configs["nms_threshold"]
 83 | 	max_per_image = db.configs["max_per_image"]
 84 | 	nms_algorithm = {
 85 | 		"nms": 0,
 86 | 		"linear_soft_nms": 1,
 87 | 		"exp_soft_nms": 2
 88 | 	}[db.configs["nms_algorithm"]]
 89 | 
 90 | 	top_bboxes = {}
 91 | 	for ind in tqdm(range(0, num_images), ncols=80, desc="locating kps"):
 92 | 		db_ind = db_inds[ind]
 93 | 
 94 | 		image_id = db.image_ids(db_ind)
 95 | 		image_file = db.image_file(db_ind)
 96 | 		image = cv2.imread(image_file)
 97 | 
 98 | 		height, width = image.shape[0:2]
 99 | 
100 | 		detections = []
101 | 		center_points = []
102 | 
103 | 		for scale in scales:
104 | 			new_height = int(height * scale)
105 | 			new_width = int(width * scale)
106 | 			new_center = np.array([new_height // 2, new_width // 2])
107 | 
108 | 			inp_height = new_height | 127
109 | 			inp_width = new_width | 127
110 | 
111 | 			images = np.zeros((1, 3, inp_height, inp_width), dtype=np.float32)
112 | 			ratios = np.zeros((1, 2), dtype=np.float32)
113 | 			borders = np.zeros((1, 4), dtype=np.float32)
114 | 			sizes = np.zeros((1, 2), dtype=np.float32)
115 | 
116 | 			out_height, out_width = (inp_height + 1) // 4, (inp_width + 1) // 4
117 | 			height_ratio = out_height / inp_height
118 | 			width_ratio = out_width / inp_width
119 | 
120 | 			resized_image = cv2.resize(image, (new_width, new_height))
121 | 			resized_image, border, offset = crop_image(resized_image, new_center, [inp_height, inp_width])
122 | 
123 | 			resized_image = resized_image / 255.
124 | 			normalize_(resized_image, db.mean, db.std)
125 | 
126 | 			images[0] = resized_image.transpose((2, 0, 1))
127 | 			borders[0] = border
128 | 			sizes[0] = [int(height * scale), int(width * scale)]
129 | 			ratios[0] = [height_ratio, width_ratio]
130 | 
131 | 			images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
132 | 			images = torch.from_numpy(images)
133 | 			dets, center = decode_func(nnet, images, K, ae_threshold=ae_threshold, kernel=nms_kernel)
134 | 			dets = dets.reshape(2, -1, 8)  # bboxes, scores, tl_scores, br_scores, clses
135 | 			# here 2 cause the flip result
136 | 			center = center.reshape(2, -1, 4)  # ct_xs, ct_ys, ct_clses, ct_scores
137 | 			dets[1, :, [0, 2]] = out_width - dets[1, :, [2, 0]]  # flip
138 | 			center[1, :, [0]] = out_width - center[1, :, [0]]  # horizontal flip
139 | 			dets = dets.reshape(1, -1, 8)
140 | 			center = center.reshape(1, -1, 4)
141 | 
142 | 			_rescale_dets(dets, ratios, borders, sizes)
143 | 			center[..., [0]] /= ratios[:, 1][:, None, None]  # remap to origin image
144 | 			center[..., [1]] /= ratios[:, 0][:, None, None]
145 | 			center[..., [0]] -= borders[:, 2][:, None, None]
146 | 			center[..., [1]] -= borders[:, 0][:, None, None]
147 | 			np.clip(center[..., [0]], 0, sizes[:, 1][:, None, None], out=center[..., [0]])
148 | 			np.clip(center[..., [1]], 0, sizes[:, 0][:, None, None], out=center[..., [1]])
149 | 			dets[:, :, 0:4] /= scale
150 | 			center[:, :, 0:2] /= scale  # remap to origin image
151 | 
152 | 			if scale == 1:
153 | 				center_points.append(center)
154 | 			detections.append(dets)
155 | 
156 | 		detections = np.concatenate(detections, axis=1)
157 | 		center_points = np.concatenate(center_points, axis=1)
158 | 
159 | 		classes = detections[..., -1]
160 | 		classes = classes[0]
161 | 		detections = detections[0]  # N x 8
162 | 		center_points = center_points[0] # N x 4
163 | 
164 | 		valid_ind = detections[:, 4] > -1
165 | 		valid_detections = detections[valid_ind]
166 | 
167 | 		box_width = valid_detections[:, 2] - valid_detections[:, 0]
168 | 		box_height = valid_detections[:, 3] - valid_detections[:, 1]
169 | 
170 | 		s_ind = (box_width * box_height <= 22500)
171 | 		l_ind = (box_width * box_height > 22500)
172 | 
173 | 		s_detections = valid_detections[s_ind]
174 | 		l_detections = valid_detections[l_ind]
175 | 		# trisection
176 | 		s_left_x = (2 * s_detections[:, 0] + s_detections[:, 2]) / 3  # x + (y-x)/3
177 | 		s_right_x = (s_detections[:, 0] + 2 * s_detections[:, 2]) / 3  # x +2(y-x)/3
178 | 		s_top_y = (2 * s_detections[:, 1] + s_detections[:, 3]) / 3
179 | 		s_bottom_y = (s_detections[:, 1] + 2 * s_detections[:, 3]) / 3
180 | 
181 | 		s_temp_score = copy.copy(s_detections[:, 4])
182 | 		s_detections[:, 4] = -1
183 | 
184 | 		center_x = center_points[:, 0][:, np.newaxis]
185 | 		center_y = center_points[:, 1][:, np.newaxis]
186 | 		s_left_x = s_left_x[np.newaxis, :]
187 | 		s_right_x = s_right_x[np.newaxis, :]
188 | 		s_top_y = s_top_y[np.newaxis, :]
189 | 		s_bottom_y = s_bottom_y[np.newaxis, :]
190 | 		# located in center region
191 | 		ind_lx = (center_x - s_left_x) > 0
192 | 		ind_rx = (center_x - s_right_x) < 0
193 | 		ind_ty = (center_y - s_top_y) > 0
194 | 		ind_by = (center_y - s_bottom_y) < 0
195 | 		# same classes
196 | 		ind_cls = (center_points[:, 2][:, np.newaxis] - s_detections[:, -1][np.newaxis, :]) == 0
197 | 		ind_s_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
198 | 								 axis=0) == 1
199 | 		index_s_new_score = np.argmax(
200 | 			((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_s_new_score],
201 | 			axis=0)  # select the box having center located in the center region
202 | 		s_detections[:, 4][ind_s_new_score] = (s_temp_score[ind_s_new_score] * 2 + center_points[
203 | 			index_s_new_score, 3]) / 3
204 | 
205 | 		l_left_x = (3 * l_detections[:, 0] + 2 * l_detections[:, 2]) / 5
206 | 		l_right_x = (2 * l_detections[:, 0] + 3 * l_detections[:, 2]) / 5
207 | 		l_top_y = (3 * l_detections[:, 1] + 2 * l_detections[:, 3]) / 5
208 | 		l_bottom_y = (2 * l_detections[:, 1] + 3 * l_detections[:, 3]) / 5
209 | 
210 | 		l_temp_score = copy.copy(l_detections[:, 4])
211 | 		l_detections[:, 4] = -1
212 | 
213 | 		center_x = center_points[:, 0][:, np.newaxis]
214 | 		center_y = center_points[:, 1][:, np.newaxis]
215 | 		l_left_x = l_left_x[np.newaxis, :]
216 | 		l_right_x = l_right_x[np.newaxis, :]
217 | 		l_top_y = l_top_y[np.newaxis, :]
218 | 		l_bottom_y = l_bottom_y[np.newaxis, :]
219 | 
220 | 		ind_lx = (center_x - l_left_x) > 0
221 | 		ind_rx = (center_x - l_right_x) < 0
222 | 		ind_ty = (center_y - l_top_y) > 0
223 | 		ind_by = (center_y - l_bottom_y) < 0
224 | 		ind_cls = (center_points[:, 2][:, np.newaxis] - l_detections[:, -1][np.newaxis, :]) == 0
225 | 		ind_l_new_score = np.max(((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0)),
226 | 								 axis=0) == 1
227 | 		index_l_new_score = np.argmax(
228 | 			((ind_lx + 0) & (ind_rx + 0) & (ind_ty + 0) & (ind_by + 0) & (ind_cls + 0))[:, ind_l_new_score], axis=0)
229 | 		l_detections[:, 4][ind_l_new_score] = (l_temp_score[ind_l_new_score] * 2 + center_points[
230 | 			index_l_new_score, 3]) / 3
231 | 
232 | 		detections = np.concatenate([l_detections, s_detections], axis=0)
233 | 		detections = detections[np.argsort(-detections[:, 4])]  # resort according to new scores
234 | 		classes = detections[..., -1]
235 | 
236 | 		# for i in range(detections.shape[0]):
237 | 		#   box_width = detections[i,2]-detections[i,0]
238 | 		#   box_height = detections[i,3]-detections[i,1]
239 | 		#   if box_width*box_height<=22500 and detections[i,4]!=-1:
240 | 		#     left_x = (2*detections[i,0]+1*detections[i,2])/3
241 | 		#     right_x = (1*detections[i,0]+2*detections[i,2])/3
242 | 		#     top_y = (2*detections[i,1]+1*detections[i,3])/3
243 | 		#     bottom_y = (1*detections[i,1]+2*detections[i,3])/3
244 | 		#     temp_score = copy.copy(detections[i,4])
245 | 		#     detections[i,4] = -1
246 | 		#     for j in range(center_points.shape[0]):
247 | 		#        if (classes[i] == center_points[j,2])and \
248 | 		#           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
249 | 		#           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
250 | 		#           detections[i,4] = (temp_score*2 + center_points[j,3])/3
251 | 		#           break
252 | 		#   elif box_width*box_height > 22500 and detections[i,4]!=-1:
253 | 		#     left_x = (3*detections[i,0]+2*detections[i,2])/5
254 | 		#     right_x = (2*detections[i,0]+3*detections[i,2])/5
255 | 		#     top_y = (3*detections[i,1]+2*detections[i,3])/5
256 | 		#     bottom_y = (2*detections[i,1]+3*detections[i,3])/5
257 | 		#     temp_score = copy.copy(detections[i,4])
258 | 		#     detections[i,4] = -1
259 | 		#     for j in range(center_points.shape[0]):
260 | 		#        if (classes[i] == center_points[j,2])and \
261 | 		#           (center_points[j,0]>left_x and center_points[j,0]< right_x) and \
262 | 		#           ((center_points[j,1]>top_y and center_points[j,1]< bottom_y)):
263 | 		#           detections[i,4] = (temp_score*2 + center_points[j,3])/3
264 | 		#           break
265 | 
266 | 		# reject detections with negative scores
267 | 		keep_inds = (detections[:, 4] > -1)
268 | 		detections = detections[keep_inds]
269 | 		classes = classes[keep_inds]
270 | 
271 | 		# soft_nms
272 | 		top_bboxes[image_id] = {}
273 | 		for j in range(categories):
274 | 			keep_inds = (classes == j)
275 | 			top_bboxes[image_id][j + 1] = detections[keep_inds][:, 0:7].astype(np.float32)
276 | 			if merge_bbox:
277 | 				soft_nms_merge(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm,
278 | 							   weight_exp=weight_exp)
279 | 			else:
280 | 				soft_nms(top_bboxes[image_id][j + 1], Nt=nms_threshold, method=nms_algorithm)
281 | 			top_bboxes[image_id][j + 1] = top_bboxes[image_id][j + 1][:, 0:5]  # N x 5
282 | 
283 | 		scores = np.hstack([
284 | 			top_bboxes[image_id][j][:, -1]
285 | 			for j in range(1, categories + 1)
286 | 		])
287 | 
288 | 		# top_box is a dict {each image result is still a dict contains results in each class}
289 | 		# select boxes
290 | 		if len(scores) > max_per_image:
291 | 			kth = len(scores) - max_per_image
292 | 			thresh = np.partition(scores, kth)[kth]
293 | 			for j in range(1, categories + 1):
294 | 				keep_inds = (top_bboxes[image_id][j][:, -1] >= thresh)
295 | 				top_bboxes[image_id][j] = top_bboxes[image_id][j][keep_inds]
296 | 
297 | 		if debug:
298 | 			image_file = db.image_file(db_ind)
299 | 			image = cv2.imread(image_file)
300 | 			im = image[:, :, (2, 1, 0)]
301 | 			fig, ax = plt.subplots(figsize=(12, 12))
302 | 			fig = ax.imshow(im, aspect='equal')
303 | 			plt.axis('off')
304 | 			fig.axes.get_xaxis().set_visible(False)
305 | 			fig.axes.get_yaxis().set_visible(False)
306 | 			# bboxes = {}
307 | 			for j in range(1, categories + 1):
308 | 				keep_inds = (top_bboxes[image_id][j][:, -1] >= 0.4)
309 | 				cat_name = db.class_name(j)
310 | 				for bbox in top_bboxes[image_id][j][keep_inds]:
311 | 					bbox = bbox[0:4].astype(np.int32)
312 | 					xmin = bbox[0]
313 | 					ymin = bbox[1]
314 | 					xmax = bbox[2]
315 | 					ymax = bbox[3]
316 | 					# if (xmax - xmin) * (ymax - ymin) > 5184:
317 | 					ax.add_patch(
318 | 						plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=colours[j - 1],
319 | 									  linewidth=4.0))
320 | 					ax.text(xmin + 1, ymin - 3, '{:s}'.format(cat_name),
321 | 							bbox=dict(facecolor=colours[j - 1], ec='black', lw=2, alpha=0.5),
322 | 							fontsize=15, color='white', weight='bold')
323 | 
324 | 			debug_file1 = os.path.join(debug_dir, "{}.pdf".format(db_ind))
325 | 			debug_file2 = os.path.join(debug_dir, "{}.jpg".format(db_ind))
326 | 			plt.savefig(debug_file1)
327 | 			plt.savefig(debug_file2)
328 | 			plt.close()
329 | 		# cv2.imwrite(debug_file, image, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
330 | 
331 | 	result_json = os.path.join(result_dir, "results.json")
332 | 	detections = db.convert_to_dict(top_bboxes)
333 | 	# with open(result_json, "w") as f:
334 | 	# 	json.dump(detections, f)
335 | 
336 | 	db.evaluate(detections)
337 | 	return 0
338 | 
339 | 
340 | def testing(db, nnet, result_dir, debug=False):
341 | 	return globals()[system_configs.sampling_function](db, nnet, result_dir, debug=debug)
342 | 


--------------------------------------------------------------------------------