├── LICENSE
├── README.md
├── data
└── .gitignore
├── exp
└── .gitignore
├── experiments
└── ctdet_coco_hg.sh
├── models
└── .gitignore
└── src
├── _init_paths.py
├── apply_prior.py
├── lib
├── datasets
│ ├── dataset
│ │ ├── coco.py
│ │ └── coco_hp.py
│ ├── dataset_factory.py
│ └── sample
│ │ └── ctdet.py
├── detectors
│ ├── base_detector.py
│ ├── ctdet.py
│ └── detector_factory.py
├── external
│ ├── .gitignore
│ ├── Makefile
│ ├── __init__.py
│ ├── nms.pyx
│ └── setup.py
├── logger.py
├── models
│ ├── data_parallel.py
│ ├── decode.py
│ ├── losses.py
│ ├── model.py
│ ├── networks
│ │ ├── DCNv2
│ │ │ ├── .gitignore
│ │ │ ├── LICENSE
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── build.py
│ │ │ ├── build_double.py
│ │ │ ├── dcn_v2.py
│ │ │ ├── dcn_v2_func.py
│ │ │ ├── make.sh
│ │ │ ├── src
│ │ │ │ ├── cuda
│ │ │ │ │ ├── dcn_v2_im2col_cuda.cu
│ │ │ │ │ ├── dcn_v2_im2col_cuda.h
│ │ │ │ │ ├── dcn_v2_im2col_cuda_double.cu
│ │ │ │ │ ├── dcn_v2_im2col_cuda_double.h
│ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu
│ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.h
│ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda_double.cu
│ │ │ │ │ └── dcn_v2_psroi_pooling_cuda_double.h
│ │ │ │ ├── dcn_v2.c
│ │ │ │ ├── dcn_v2.h
│ │ │ │ ├── dcn_v2_cuda.c
│ │ │ │ ├── dcn_v2_cuda.h
│ │ │ │ ├── dcn_v2_cuda_double.c
│ │ │ │ ├── dcn_v2_cuda_double.h
│ │ │ │ ├── dcn_v2_double.c
│ │ │ │ └── dcn_v2_double.h
│ │ │ └── test.py
│ │ ├── dlav0.py
│ │ ├── large_hourglass.py
│ │ ├── msra_resnet.py
│ │ ├── pose_dla_dcn.py
│ │ ├── py_utils
│ │ │ ├── __init__.py
│ │ │ ├── _cpools
│ │ │ │ ├── .gitignore
│ │ │ │ ├── __init__.py
│ │ │ │ ├── setup.py
│ │ │ │ └── src
│ │ │ │ │ ├── bottom_pool.cpp
│ │ │ │ │ ├── left_pool.cpp
│ │ │ │ │ ├── right_pool.cpp
│ │ │ │ │ └── top_pool.cpp
│ │ │ ├── data_parallel.py
│ │ │ ├── losses.py
│ │ │ ├── modules.py
│ │ │ ├── scatter_gather.py
│ │ │ └── utils.py
│ │ └── resnet_dcn.py
│ ├── scatter_gather.py
│ └── utils.py
├── opts.py
├── trains
│ ├── base_trainer.py
│ ├── ctdet.py
│ └── train_factory.py
└── utils
│ ├── __init__.py
│ ├── ddd_utils.py
│ ├── debugger.py
│ ├── image.py
│ ├── oracle_utils.py
│ ├── post_process.py
│ └── utils.py
├── test_HOI.py
├── timer.py
├── tools
├── _init_paths.py
├── calc_coco_overlap.py
├── convert_hourglass_weight.py
├── eval_coco.py
├── eval_coco_hp.py
├── reval.py
└── vis_pred.py
└── vsrl_eval.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 Tiancai Wang
4 | All rights reserved.
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Learning Human-Object Interaction Detection using Interaction Points
2 |
3 | Created by Tiancai Wang, Tong Yang, Martin Danelljan, Fahad Shahbaz Khan, Xiangyu Zhang, Jian Sun
4 |
5 | Link for our paper: [arxiv](https://arxiv.org/abs/2003.14023) and [CVPR2020](http://openaccess.thecvf.com/content_CVPR_2020/html/Wang_Learning_Human-Object_Interaction_Detection_Using_Interaction_Points_CVPR_2020_paper.html)
6 |
7 | ### Introduction
8 | Understanding interactions between humans and objects is one of the fundamental problems in visual classification and an essential step towards detailed scene understand-ing. Human-object interaction(HOI) detection strives to localize both the human and an object as well as the identification of complex interactions between them. Most existing HOI detection approaches are instance-centric where interactions between all possible human-object pairs are predicted based on appearance features and coarse spatial information. We argue that appearance features aloneare insufficient to capture complex human-object interactions. In this paper, we therefore propose a novel fully-convolutional approach that directly detects the interactions between human-object pairs. Our network predicts interaction points, which directly localize and classify the interaction. Paired with the densely predicted interaction vectors, the interactions are associated with human and object detections to obtain final predictions. To the best of ourknowledge, we are the first to propose an approach whereHOI detection is posed as a keypoint detection and group-ing problem. Experiments are performed on two popularbenchmarks: V-COCO and HICO-DET. Our approach sets a new state-of-the-art on both datasets.
9 |
10 | ## Installation
11 | - Clone this repository. This repository is mainly based on [CenterNet](https://github.com/xingyizhou/CenterNet) and [iCAN](https://github.com/vt-vl-lab/iCAN).
12 |
13 | ```Shell
14 | IPNet_ROOT=/path/to/clone/IPNet
15 | git clone https://github.com/vaesl/IP-Net $IPNet_ROOT
16 | ```
17 | - The code was tested on Ubuntu 18.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v1.0.1.
18 | NVIDIA GPUs are needed for testing. After install Anaconda, create a new conda environment, activate the environment and install pytorch1.0.1.
19 |
20 | ```Shell
21 | conda create -n IPNet python=3.6
22 | source activate IPNet
23 | conda install pytorch=1.0.1 torchvision -c pytorch
24 | ```
25 |
26 | - Install the requirements.
27 | ```Shell
28 | pip3 install -r requirements.txt
29 | ```
30 | - Compiling Center Pooling Layers.
31 | ```Shell
32 | cd IPNet_ROOT/src/lib/models/networks/py_utils/_cpools/
33 | python setup.py install --user
34 | ```
35 |
36 | - Install [COCOAPI](https://github.com/cocodataset/cocoapi):
37 |
38 | ~~~
39 | # COCOAPI=/path/to/clone/cocoapi
40 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
41 | cd $COCOAPI/PythonAPI
42 | make
43 | python setup.py install --user
44 | ~~~
45 |
46 | ## Download
47 | To evaluate the performance reported in the paper, V-COCO and HICO-DET dataset as well as our trained models need to be downloaded.
48 |
49 | ### V-COCO and HICO-DET Datasets
50 | Download datasets and setup evaluation and API, please follow [iCAN](https://github.com/vt-vl-lab/iCAN).
51 |
52 | ### Trained Models
53 | Please access [Google Driver](https://drive.google.com/file/d/1stBqpTncUFfl-naKn4NONRmC-89jtdyh/view?usp=sharing)
54 | to obtain our trained models for V-COCO and put the models into corresponding directory(e.g. '~/weights/V-COCO/').
55 | Note that we only release models of V-COCO for the time being.
56 |
57 | ## Evaluation
58 | To check the performance reported in the paper, just simply run:
59 |
60 | ```Shell
61 | python3 test_HOI.py ctdet --exp_id coco_hg --fix_res --arch hourglass --flip_test --load_model /path/to/model/weights
62 | ```
63 |
64 | ## Citation
65 | Please cite our paper in your publications if it helps your research:
66 |
67 | @article{Wang2020IPNet,
68 | title = {Learning Human-Object Interaction Detection using Interaction Points},
69 | author = {Tiancai Wang, Tong Yang, Martin Danelljan, Fahad Shahbaz Khan, Xiangyu Zhang, Jian Sun},
70 | booktitle = {CVPR},
71 | year = {2020}
72 | }
73 |
--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/exp/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/experiments/ctdet_coco_hg.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd src
3 | # train
4 | python main.py ctdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ExtremeNet_500000.pth --gpus 0,1,2,3,4
5 | # test
6 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume
7 | # flip test
8 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test
9 | # multi scale test
10 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
11 | cd ..
--------------------------------------------------------------------------------
/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 |
--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import sys
3 |
4 | def add_path(path):
5 | if path not in sys.path:
6 | sys.path.insert(0, path)
7 |
8 | this_dir = osp.dirname(__file__)
9 |
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 |
--------------------------------------------------------------------------------
/src/apply_prior.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Tensorflow iCAN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Chen Gao
5 | # --------------------------------------------------------
6 |
7 | def apply_prior(Object, prediction):
8 |
9 |
10 | if Object[4] != 32: # not a snowboard, then the action is impossible to be snowboard
11 | prediction[21] = 0
12 |
13 | if Object[4] != 74: # not a book, then the action is impossible to be read
14 | prediction[24] = 0
15 |
16 | if Object[4] != 33: # not a sports ball, then the action is impossible to be kick
17 | prediction[7] = 0
18 |
19 | if (Object[4] != 41) and (Object[4] != 40) and (Object[4] != 42) and (Object[4] != 46): # not 'wine glass', 'bottle', 'cup', 'bowl', then the action is impossible to be drink
20 | prediction[13] = 0
21 |
22 | if Object[4] != 37: # not a skateboard, then the action is impossible to be skateboard
23 | prediction[26] = 0
24 |
25 | if Object[4] != 38: # not a surfboard, then the action is impossible to be surfboard
26 | prediction[0] = 0
27 |
28 | if Object[4] != 31: # not a ski, then the action is impossible to be ski
29 | prediction[1] = 0
30 |
31 | if Object[4] != 64: # not a laptop, then the action is impossible to be work on computer
32 | prediction[8] = 0
33 |
34 | if (Object[4] != 77) and (Object[4] != 43) and (Object[4] != 44): # not 'scissors', 'fork', 'knife', then the action is impossible to be cur instr
35 | prediction[2] = 0
36 |
37 | if (Object[4] != 33) and (Object[4] != 30): # not 'sports ball', 'frisbee', then the action is impossible to be throw and catch
38 | prediction[15] = 0
39 | prediction[28] = 0
40 |
41 | if Object[4] != 68: # not a cellphone, then the action is impossible to be talk_on_phone
42 | prediction[6] = 0
43 |
44 | if (Object[4] != 14) and (Object[4] != 61) and (Object[4] != 62) and (Object[4] != 60) and (Object[4] != 58) and (Object[4] != 57): # not 'bench', 'dining table', 'toilet', 'bed', 'couch', 'chair', then the action is impossible to be lay
45 | prediction[12] = 0
46 |
47 | if (Object[4] != 32) and (Object[4] != 31) and (Object[4] != 37) and (Object[4] != 38): # not 'snowboard', 'skis', 'skateboard', 'surfboard', then the action is impossible to be jump
48 | prediction[11] = 0
49 |
50 | if (Object[4] != 47) and (Object[4] != 48) and (Object[4] != 49) and (Object[4] != 50) and (Object[4] != 51) and (Object[4] != 52) and (Object[4] != 53) and (Object[4] != 54) and (Object[4] != 55) and (Object[4] != 56): # not ''banana', 'apple', 'sandwich', 'orange', 'carrot', 'broccoli', 'hot dog', 'pizza', 'cake', 'donut', then the action is impossible to be eat_obj
51 | prediction[9] = 0
52 |
53 | if (Object[4] != 43) and (Object[4] != 44) and (Object[4] != 45): # not 'fork', 'knife', 'spoon', then the action is impossible to be eat_instr
54 | prediction[16] = 0
55 |
56 | if (Object[4] != 39) and (Object[4] != 35): # not 'tennis racket', 'baseball bat', then the action is impossible to be hit_instr
57 | prediction[19] = 0
58 |
59 | if (Object[4] != 33): # not 'sports ball, then the action is impossible to be hit_obj
60 | prediction[20] = 0
61 |
62 |
63 | if (Object[4] != 2) and (Object[4] != 4) and (Object[4] != 6) and (Object[4] != 8) and (Object[4] != 9) and (Object[4] != 7) and (Object[4] != 5) and (Object[4] != 3) and (Object[4] != 18) and (Object[4] != 21): # not 'bicycle', 'motorcycle', 'bus', 'truck', 'boat', 'train', 'airplane', 'car', 'horse', 'elephant', then the action is impossible to be ride
64 | prediction[5] = 0
65 |
66 | if (Object[4] != 2) and (Object[4] != 4) and (Object[4] != 18) and (Object[4] != 21) and (Object[4] != 14) and (Object[4] != 57) and (Object[4] != 58) and (Object[4] != 60) and (Object[4] != 62) and (Object[4] != 61) and (Object[4] != 29) and (Object[4] != 27) and (Object[4] != 25): # not 'bicycle', 'motorcycle', 'horse', 'elephant', 'bench', 'chair', 'couch', 'bed', 'toilet', 'dining table', 'suitcase', 'handbag', 'backpack', then the action is impossible to be sit
67 | prediction[10] = 0
68 |
69 | if (Object[4] == 1):
70 | prediction[4] = 0
71 |
72 | return prediction
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 |
11 | import torch.utils.data as data
12 |
13 |
14 | class COCO(data.Dataset):
15 | num_obj_classes = 80
16 | num_act_classes = 29
17 | default_resolution = [512, 512]
18 | mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(1, 1, 3)
19 | std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(1, 1, 3)
20 |
21 | def __init__(self, opt, split):
22 | super(COCO, self).__init__()
23 | self.data_dir = os.path.join(opt.data_dir, 'vcoco')
24 | if split == 'test':
25 | self.annot_path = os.path.join(
26 | self.data_dir, 'annotations', 'instances_vcoco_test2014.json')
27 | else:
28 | self.annot_path = os.path.join(
29 | self.data_dir, 'annotations', 'instances_hoi_action_point_iCAN.json')
30 | self.max_objs = 128
31 | self.class_name = [
32 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
33 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
34 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
35 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
36 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
37 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
38 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
39 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
40 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
41 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
42 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
43 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
44 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
45 | self._valid_ids = [
46 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
47 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
48 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
49 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
50 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
51 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
52 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
53 | 82, 84, 85, 86, 87, 88, 89, 90]
54 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
55 | self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \
56 | for v in range(1, self.num_obj_classes + 1)]
57 | self._data_rng = np.random.RandomState(123)
58 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
59 | dtype=np.float32)
60 | self._eig_vec = np.array([
61 | [-0.58752847, -0.69563484, 0.41340352],
62 | [-0.5832747, 0.00994535, -0.81221408],
63 | [-0.56089297, 0.71832671, 0.41158938]
64 | ], dtype=np.float32)
65 | # self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
66 | # self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
67 |
68 | self.split = split
69 | self.opt = opt
70 |
71 | print('==> initializing coco 2014 {} data.'.format(split))
72 | self.coco = coco.COCO(self.annot_path)
73 | self.images = self.coco.getImgIds()
74 | self.num_samples = len(self.images)
75 |
76 | print('Loaded {} {} samples'.format(split, self.num_samples))
77 |
78 | def _to_float(self, x):
79 | return float("{:.2f}".format(x))
80 |
81 | def convert_eval_format(self, all_bboxes):
82 | # import pdb; pdb.set_trace()
83 | detections = []
84 | for image_id in all_bboxes:
85 | for cls_ind in all_bboxes[image_id]:
86 | category_id = self._valid_ids[cls_ind - 1]
87 | for bbox in all_bboxes[image_id][cls_ind]:
88 | bbox[2] -= bbox[0]
89 | bbox[3] -= bbox[1]
90 | score = bbox[4]
91 | bbox_out = list(map(self._to_float, bbox[0:4]))
92 |
93 | detection = {
94 | "image_id": int(image_id),
95 | "category_id": int(category_id),
96 | "bbox": bbox_out,
97 | "score": float("{:.2f}".format(score))
98 | }
99 | if len(bbox) > 5:
100 | extreme_points = list(map(self._to_float, bbox[5:13]))
101 | detection["extreme_points"] = extreme_points
102 | detections.append(detection)
103 | return detections
104 |
105 | def __len__(self):
106 | return self.num_samples
107 |
108 | def save_results(self, results, save_dir):
109 | json.dump(self.convert_eval_format(results),
110 | open('{}/results.json'.format(save_dir), 'w'))
111 |
112 | def run_eval(self, results, save_dir):
113 | # result_json = os.path.join(save_dir, "results.json")
114 | # detections = self.convert_eval_format(results)
115 | # json.dump(detections, open(result_json, "w"))
116 | self.save_results(results, save_dir)
117 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
118 | coco_eval = COCOeval(self.coco, coco_dets, "bbox")
119 | coco_eval.evaluate()
120 | coco_eval.accumulate()
121 | coco_eval.summarize()
122 |
--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco_hp.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import pycocotools.coco as coco
6 | from pycocotools.cocoeval import COCOeval
7 | import numpy as np
8 | import json
9 | import os
10 |
11 | import torch.utils.data as data
12 |
13 | class COCOHP(data.Dataset):
14 | num_classes = 1
15 | num_joints = 17
16 | default_resolution = [512, 512]
17 | mean = np.array([0.40789654, 0.44719302, 0.47026115],
18 | dtype=np.float32).reshape(1, 1, 3)
19 | std = np.array([0.28863828, 0.27408164, 0.27809835],
20 | dtype=np.float32).reshape(1, 1, 3)
21 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10],
22 | [11, 12], [13, 14], [15, 16]]
23 | def __init__(self, opt, split):
24 | super(COCOHP, self).__init__()
25 | self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
26 | [4, 6], [3, 5], [5, 6],
27 | [5, 7], [7, 9], [6, 8], [8, 10],
28 | [6, 12], [5, 11], [11, 12],
29 | [12, 14], [14, 16], [11, 13], [13, 15]]
30 |
31 | self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
32 | self.data_dir = os.path.join(opt.data_dir, 'coco')
33 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
34 | if split == 'test':
35 | self.annot_path = os.path.join(
36 | self.data_dir, 'annotations',
37 | 'image_info_test-dev2017.json').format(split)
38 | else:
39 | self.annot_path = os.path.join(
40 | self.data_dir, 'annotations',
41 | 'person_keypoints_{}2017.json').format(split)
42 | self.max_objs = 32
43 | self._data_rng = np.random.RandomState(123)
44 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
45 | dtype=np.float32)
46 | self._eig_vec = np.array([
47 | [-0.58752847, -0.69563484, 0.41340352],
48 | [-0.5832747, 0.00994535, -0.81221408],
49 | [-0.56089297, 0.71832671, 0.41158938]
50 | ], dtype=np.float32)
51 | self.split = split
52 | self.opt = opt
53 |
54 | print('==> initializing coco 2017 {} data.'.format(split))
55 | self.coco = coco.COCO(self.annot_path)
56 | image_ids = self.coco.getImgIds()
57 |
58 | if split == 'train':
59 | self.images = []
60 | for img_id in image_ids:
61 | idxs = self.coco.getAnnIds(imgIds=[img_id])
62 | if len(idxs) > 0:
63 | self.images.append(img_id)
64 | else:
65 | self.images = image_ids
66 | self.num_samples = len(self.images)
67 | print('Loaded {} {} samples'.format(split, self.num_samples))
68 |
69 | def _to_float(self, x):
70 | return float("{:.2f}".format(x))
71 |
72 | def convert_eval_format(self, all_bboxes):
73 | # import pdb; pdb.set_trace()
74 | detections = []
75 | for image_id in all_bboxes:
76 | for cls_ind in all_bboxes[image_id]:
77 | category_id = 1
78 | for dets in all_bboxes[image_id][cls_ind]:
79 | bbox = dets[:4]
80 | bbox[2] -= bbox[0]
81 | bbox[3] -= bbox[1]
82 | score = dets[4]
83 | bbox_out = list(map(self._to_float, bbox))
84 | keypoints = np.concatenate([
85 | np.array(dets[5:39], dtype=np.float32).reshape(-1, 2),
86 | np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
87 | keypoints = list(map(self._to_float, keypoints))
88 |
89 | detection = {
90 | "image_id": int(image_id),
91 | "category_id": int(category_id),
92 | "bbox": bbox_out,
93 | "score": float("{:.2f}".format(score)),
94 | "keypoints": keypoints
95 | }
96 | detections.append(detection)
97 | return detections
98 |
99 | def __len__(self):
100 | return self.num_samples
101 |
102 | def save_results(self, results, save_dir):
103 | json.dump(self.convert_eval_format(results),
104 | open('{}/results.json'.format(save_dir), 'w'))
105 |
106 |
107 | def run_eval(self, results, save_dir):
108 | # result_json = os.path.join(opt.save_dir, "results.json")
109 | # detections = convert_eval_format(all_boxes)
110 | # json.dump(detections, open(result_json, "w"))
111 | self.save_results(results, save_dir)
112 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
113 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
114 | coco_eval.evaluate()
115 | coco_eval.accumulate()
116 | coco_eval.summarize()
117 | coco_eval = COCOeval(self.coco, coco_dets, "bbox")
118 | coco_eval.evaluate()
119 | coco_eval.accumulate()
120 | coco_eval.summarize()
--------------------------------------------------------------------------------
/src/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from .sample.ctdet import CTDetDataset
6 |
7 | from .dataset.coco import COCO
8 | from .dataset.coco_hp import COCOHP
9 |
10 |
11 | dataset_factory = {
12 | 'coco': COCO,
13 | 'coco_hp': COCOHP
14 | }
15 |
16 | _sample_factory = {
17 | 'ctdet': CTDetDataset,
18 | }
19 |
20 |
21 | def get_dataset(dataset, task):
22 | class Dataset(dataset_factory[dataset], _sample_factory[task]):
23 | pass
24 | return Dataset
25 |
--------------------------------------------------------------------------------
/src/lib/datasets/sample/ctdet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch.utils.data as data
6 | import numpy as np
7 | import torch
8 | import json
9 | import cv2
10 | import os
11 | from utils.image import flip, color_aug
12 | from utils.image import get_affine_transform, affine_transform
13 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
14 | from utils.image import draw_dense_reg
15 | import math
16 | import boto3
17 | import io
18 | import Image
19 |
20 | host = "http://oss.wuhu-a.brainpp.cn"
21 | s3_client = boto3.client('s3', endpoint_url=host)
22 |
23 |
24 | class CTDetDataset(data.Dataset):
25 | def _coco_box_to_bbox(self, box):
26 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
27 | dtype=np.float32)
28 | return bbox
29 |
30 | def _get_border(self, border, size):
31 | i = 1
32 | while size - border // i <= border // i:
33 | i *= 2
34 | return border // i
35 |
36 | def __getitem__(self, index):
37 | img_id = self.images[index]
38 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
39 | img_dir = 'coco/images/trainval2017/'
40 | img_path = os.path.join(img_dir, file_name)
41 | ann_ids = self.coco.getAnnIds(imgIds=[img_id])
42 | anns = self.coco.loadAnns(ids=ann_ids)
43 | num_objs = min(len(anns), self.max_objs)
44 |
45 | img_obj = s3_client.get_object(Bucket="wangtiancai", Key=img_path)
46 | img = np.array(Image.open(io.BytesIO(img_obj['Body'].read())).convert('RGB'), dtype=np.float32)
47 |
48 | height, width = img.shape[0], img.shape[1]
49 | c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
50 | if self.opt.keep_res:
51 | input_h = (height | self.opt.pad) + 1
52 | input_w = (width | self.opt.pad) + 1
53 | s = np.array([input_w, input_h], dtype=np.float32)
54 | else:
55 | s = max(img.shape[0], img.shape[1]) * 1.0
56 | input_h, input_w = self.opt.input_h, self.opt.input_w
57 |
58 | flipped = False
59 | if self.split == 'train':
60 | if not self.opt.not_rand_crop:
61 | s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
62 | w_border = self._get_border(128, img.shape[1])
63 | h_border = self._get_border(128, img.shape[0])
64 | c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
65 | c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
66 | else:
67 | sf = self.opt.scale
68 | cf = self.opt.shift
69 | c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
70 | c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
71 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
72 |
73 | if np.random.random() < self.opt.flip:
74 | flipped = True
75 | # print(img.shape)
76 | img = img[:, ::-1, :]
77 | c[0] = width - c[0] - 1
78 |
79 | trans_input = get_affine_transform(
80 | c, s, 0, [input_w, input_h])
81 | inp = cv2.warpAffine(img, trans_input,
82 | (input_w, input_h),
83 | flags=cv2.INTER_LINEAR)
84 | inp = (inp.astype(np.float32) / 255.)
85 | if self.split == 'train' and not self.opt.no_color_aug:
86 | color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
87 | inp = (inp - self.mean) / self.std
88 | inp = inp.transpose(2, 0, 1)
89 |
90 | output_h = input_h // self.opt.down_ratio
91 | output_w = input_w // self.opt.down_ratio
92 | num_obj_classes = self.num_obj_classes
93 | num_act_classes = self.num_act_classes
94 | trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
95 |
96 | hm = np.zeros((num_obj_classes, output_h, output_w), dtype=np.float32)
97 | hm_act = np.zeros((num_act_classes, output_h, output_w), dtype=np.float32)
98 | wh = np.zeros((self.max_objs, 2), dtype=np.float32)
99 | wh_act = np.zeros((self.max_objs, 2), dtype=np.float32)
100 | dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
101 | reg = np.zeros((self.max_objs, 2), dtype=np.float32)
102 | ind = np.zeros((self.max_objs), dtype=np.int64)
103 | ind_act = np.zeros((self.max_objs), dtype=np.int64)
104 | reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
105 | reg_act_mask = np.zeros((self.max_objs), dtype=np.uint8)
106 | cat_spec_wh = np.zeros((self.max_objs, num_obj_classes * 2), dtype=np.float32)
107 | cat_spec_mask = np.zeros((self.max_objs, num_obj_classes * 2), dtype=np.uint8)
108 |
109 | draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
110 | draw_umich_gaussian
111 |
112 | gt_det = []
113 |
114 | p = 0
115 |
116 | for k in range(num_objs):
117 | ann = anns[k]
118 | bbox = self._coco_box_to_bbox(ann['bbox'])
119 | cls_id = int(self.cat_ids[ann['category_id']])
120 |
121 | if flipped:
122 | bbox[[0, 2]] = width - bbox[[2, 0]] - 1
123 | bbox[:2] = affine_transform(bbox[:2], trans_output)
124 | bbox[2:] = affine_transform(bbox[2:], trans_output)
125 | bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
126 | bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
127 |
128 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
129 | if h > 0 and w > 0:
130 | radius = gaussian_radius((math.ceil(h), math.ceil(w)))
131 | radius = max(0, int(radius))
132 | radius = self.opt.hm_gauss if self.opt.mse_loss else radius
133 | ct = np.array(
134 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
135 | ct_int = ct.astype(np.int32)
136 | draw_gaussian(hm[cls_id], ct_int, radius)
137 | wh[k] = 1. * w, 1. * h
138 | ind[k] = ct_int[1] * output_w + ct_int[0]
139 | reg[k] = ct - ct_int
140 | reg_mask[k] = 1
141 | cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
142 | cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
143 | if self.opt.dense_wh:
144 | draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
145 | gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
146 | ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
147 |
148 | if ann['category_id'] == 1:
149 | if len(ann['bbox']) != 4:
150 | for cls_id in ann['bbox'][4:]:
151 | draw_gaussian(hm_act[cls_id], ct_int, radius)
152 |
153 | # h_act = h, w_act = w
154 | # wh_act[p] = 1. * w, 1. * h
155 | # ind_act[p] = ct_int[1] * output_w + ct_int[0]
156 | # reg_act_mask[p] = 1
157 | # p += 1
158 |
159 | if ann['obj_bbox'] != []:
160 | for i, obbox in enumerate(ann['obj_bbox']):
161 | o_bbox = self._coco_box_to_bbox(obbox[:4])
162 | o_act = obbox[4:]
163 | o_bbox = np.array(o_bbox)
164 | if flipped:
165 | o_bbox[[0, 2]] = width - o_bbox[[2, 0]] - 1
166 | o_bbox[:2] = affine_transform(o_bbox[:2], trans_output)
167 | o_bbox[2:] = affine_transform(o_bbox[2:], trans_output)
168 | o_bbox[[0, 2]] = np.clip(o_bbox[[0, 2]], 0, output_w - 1)
169 | o_bbox[[1, 3]] = np.clip(o_bbox[[1, 3]], 0, output_h - 1)
170 |
171 | o_h, o_w = o_bbox[3] - o_bbox[1], o_bbox[2] - o_bbox[0]
172 |
173 | if o_h > 0 and o_w > 0:
174 | # radius = gaussian_radius((math.ceil(o_h), math.ceil(o_w)))
175 | # radius = max(0, int(radius))
176 | radius = 10
177 | radius = self.opt.hm_gauss if self.opt.mse_loss else radius
178 |
179 | o_ct = np.array(
180 | [(o_bbox[0] + o_bbox[2]) / 2, (o_bbox[1] + o_bbox[3]) / 2], dtype=np.float32)
181 | act_ct = (ct + o_ct) / 2
182 | act_ct_int = act_ct.astype(np.int32)
183 |
184 | h_act, w_act = abs(ct[1] - o_ct[1]), abs(ct[0] - o_ct[0])
185 | wh_act[p] = 1. * w_act, 1. * h_act
186 | ind_act[p] = act_ct_int[1] * output_w + act_ct_int[0]
187 | reg_act_mask[p] = 1
188 | p += 1
189 |
190 | for cls_id in o_act:
191 | draw_gaussian(hm_act[cls_id], act_ct_int, radius)
192 |
193 | ret = {'input': inp, 'hm_act': hm_act, 'wh_act':wh_act, 'ind_act':ind_act, 'reg_act_mask':reg_act_mask}
194 |
195 | if self.opt.dense_wh:
196 | hm_a = hm.max(axis=0, keepdims=True)
197 | dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
198 | ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
199 | del ret['wh']
200 | elif self.opt.cat_spec_wh:
201 | ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
202 | del ret['wh']
203 | if self.opt.reg_offset:
204 | ret.update({'reg': reg})
205 | if self.opt.debug > 0 or not self.split == 'train':
206 | gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
207 | np.zeros((1, 6), dtype=np.float32)
208 | meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
209 | ret['meta'] = meta
210 | return ret
211 |
--------------------------------------------------------------------------------
/src/lib/detectors/base_detector.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import numpy as np
7 | from progress.bar import Bar
8 | import time
9 | import torch
10 |
11 | from models.model import create_model, load_model
12 | from utils.image import get_affine_transform
13 | from utils.debugger import Debugger
14 |
15 | import boto3
16 | import io
17 | import Image
18 |
19 | host = "http://oss.wuhu-a.brainpp.cn"
20 | s3_client = boto3.client('s3', endpoint_url=host)
21 |
22 |
23 | class BaseDetector(object):
24 | def __init__(self, opt):
25 | if opt.gpus[0] >= 0:
26 | opt.device = torch.device('cuda')
27 | else:
28 | opt.device = torch.device('cpu')
29 |
30 | print('Creating model...')
31 | self.model = create_model(opt.arch, opt.heads, opt.head_conv)
32 | self.model = load_model(self.model, opt.load_model)
33 | self.model = self.model.to(opt.device)
34 | self.model.eval()
35 |
36 | self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
37 | self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
38 | self.max_per_image = 100
39 | self.num_obj_classes = opt.num_obj_classes
40 | self.num_act_classes = opt.num_act_classes
41 | self.scales = opt.test_scales
42 | self.opt = opt
43 | self.pause = True
44 |
45 | def pre_process(self, image, scale, meta=None):
46 | height, width = image.shape[0:2]
47 | new_height = int(height * scale)
48 | new_width = int(width * scale)
49 | if self.opt.fix_res:
50 | inp_height, inp_width = self.opt.input_h, self.opt.input_w
51 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
52 | s = max(height, width) * 1.0
53 | else:
54 | inp_height = (new_height | self.opt.pad) + 1
55 | inp_width = (new_width | self.opt.pad) + 1
56 | c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
57 | s = np.array([inp_width, inp_height], dtype=np.float32)
58 |
59 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
60 | resized_image = cv2.resize(image, (new_width, new_height))
61 | inp_image = cv2.warpAffine(
62 | resized_image, trans_input, (inp_width, inp_height),
63 | flags=cv2.INTER_LINEAR)
64 | inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
65 |
66 | images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
67 | if self.opt.flip_test:
68 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
69 | images = torch.from_numpy(images)
70 | meta = {'c': c, 's': s,
71 | 'out_height': inp_height // self.opt.down_ratio,
72 | 'out_width': inp_width // self.opt.down_ratio}
73 | return images, meta
74 |
75 | def process(self, images, return_time=False):
76 | raise NotImplementedError
77 |
78 | def post_process(self, dets_act, meta, scale=1):
79 | raise NotImplementedError
80 |
81 | def merge_outputs(self, detections):
82 | raise NotImplementedError
83 |
84 | def debug(self, debugger, images, dets, output, scale=1):
85 | raise NotImplementedError
86 |
87 | def show_results(self, debugger, image, results):
88 | raise NotImplementedError
89 |
90 | def run(self, img_path, meta=None):
91 | pre_processed = False
92 | # if isinstance(image_or_path_or_tensor, np.ndarray):
93 | # image = image_or_path_or_tensor
94 | # elif type(image_or_path_or_tensor) == type (''):
95 | # image = cv2.imread(image_or_path_or_tensor)
96 | # else:
97 | # image = image_or_path_or_tensor['image'][0].numpy()
98 | # pre_processed_images = image_or_path_or_tensor
99 | # pre_processed = True
100 |
101 | img_obj = s3_client.get_object(Bucket="wangtiancai", Key=img_path)
102 | image = np.array(Image.open(io.BytesIO(img_obj['Body'].read())).convert('RGB'), dtype=np.float32)
103 |
104 | results = []
105 | for scale in self.scales:
106 | scale_start_time = time.time()
107 | if not pre_processed:
108 | images, meta = self.pre_process(image, scale, meta)
109 | else:
110 | # import pdb; pdb.set_trace()
111 | images = pre_processed_images['images'][scale][0]
112 | meta = pre_processed_images['meta'][scale]
113 | meta = {k: v.numpy()[0] for k, v in meta.items()}
114 | images = images.to(self.opt.device)
115 |
116 | # print(images.shape)
117 |
118 | output, dets_act, forward_time = self.process(images, return_time=True)
119 |
120 | dets_act = self.post_process(dets_act, meta, scale)
121 |
122 | results.append(dets_act)
123 |
124 | return results
125 |
--------------------------------------------------------------------------------
/src/lib/detectors/ctdet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import cv2
6 | import numpy as np
7 | from progress.bar import Bar
8 | import time
9 | import torch
10 |
11 | # from external.nms import soft_nms
12 | from models.decode import ctdet_decode
13 | from models.utils import flip_tensor
14 | from utils.image import get_affine_transform
15 | from utils.post_process import ctdet_post_process
16 | from utils.debugger import Debugger
17 |
18 | from .base_detector import BaseDetector
19 |
20 |
21 | class CtdetDetector(BaseDetector):
22 | def __init__(self, opt):
23 | super(CtdetDetector, self).__init__(opt)
24 |
25 | def process(self, images, return_time=False):
26 | with torch.no_grad():
27 | output = self.model(images)[-1]
28 | hm_act = output['hm_act_f'].sigmoid_()
29 | reg_act = None
30 | wh_act = output['wh_act']
31 | if self.opt.flip_test:
32 | hm_act = (hm_act[0:1] + flip_tensor(hm_act[1:2])) / 2
33 | wh_act = (wh_act[0:1] + flip_tensor(wh_act[1:2])) / 2
34 | torch.cuda.synchronize()
35 | forward_time = time.time()
36 | dets_act = ctdet_decode(hm_act, wh_act, reg_act=reg_act, K=self.opt.K)
37 |
38 | if return_time:
39 | return output, dets_act, forward_time
40 | else:
41 | return output, dets_act
42 |
43 |
44 | def post_process(self, dets_act, meta, scale=1):
45 | dets_act = dets_act.detach().cpu().numpy()
46 |
47 | dets_act = dets_act.reshape(1, -1, dets_act.shape[2])
48 |
49 | dets_act = ctdet_post_process(
50 | dets_act.copy(), [meta['c']], [meta['s']],
51 | meta['out_height'], meta['out_width'], self.opt.num_obj_classes, self.opt.num_act_classes)
52 | # print(dets_act)
53 |
54 | # for j in range(1, self.num_obj_classes + 1):
55 | # dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
56 | # dets[0][j][:, :4] /= scale
57 | for j in range(1, self.num_act_classes + 1):
58 | dets_act[0][j] = np.array(dets_act[0][j], dtype=np.float32).reshape(-1, 7)
59 | dets_act[0][j][:, :6] /= scale
60 |
61 | # print(dets_act[0])
62 | return dets_act[0]
63 |
64 | def merge_outputs(self, detections):
65 | results = {}
66 | for j in range(1, self.num_obj_classes + 1):
67 | results[j] = np.concatenate(
68 | [detection[j] for detection in detections], axis=0).astype(np.float32)
69 | if len(self.scales) > 1 or self.opt.nms:
70 | soft_nms(results[j], Nt=0.5, method=2)
71 | scores = np.hstack(
72 | [results[j][:, 4] for j in range(1, self.num_obj_classes + 1)])
73 | if len(scores) > self.max_per_image:
74 | kth = len(scores) - self.max_per_image
75 | thresh = np.partition(scores, kth)[kth]
76 | for j in range(1, self.num_obj_classes + 1):
77 | keep_inds = (results[j][:, 4] >= thresh)
78 | results[j] = results[j][keep_inds]
79 | return results
80 |
81 | def debug(self, debugger, images, dets, output, scale=1):
82 | detection = dets.detach().cpu().numpy().copy()
83 | detection[:, :, :4] *= self.opt.down_ratio
84 | for i in range(1):
85 | img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
86 | img = ((img * self.std + self.mean) * 255).astype(np.uint8)
87 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
88 | debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale))
89 | debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale))
90 | for k in range(len(dets[i])):
91 | if detection[i, k, 4] > self.opt.center_thresh:
92 | debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
93 | detection[i, k, 4],
94 | img_id='out_pred_{:.1f}'.format(scale))
95 |
96 | def show_results(self, debugger, image, results):
97 | debugger.add_img(image, img_id='ctdet')
98 | for j in range(1, self.num_obj_classes + 1):
99 | for bbox in results[j]:
100 | if bbox[4] > self.opt.vis_thresh:
101 | debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='ctdet')
102 | debugger.show_all_imgs(pause=self.pause)
103 |
--------------------------------------------------------------------------------
/src/lib/detectors/detector_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | from .ctdet import CtdetDetector
6 |
7 | detector_factory = {
8 | 'ctdet': CtdetDetector
9 | }
10 |
--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 |
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 |
--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 |
--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/external/__init__.py
--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | from distutils.core import setup
3 | from distutils.extension import Extension
4 | from Cython.Build import cythonize
5 |
6 | extensions = [
7 | Extension(
8 | "nms",
9 | ["nms.pyx"],
10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 | )
12 | ]
13 |
14 | setup(
15 | name="coco",
16 | ext_modules=cythonize(extensions),
17 | include_dirs=[numpy.get_include()]
18 | )
19 |
--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
6 | import os
7 | import time
8 | import sys
9 | import torch
10 | USE_TENSORBOARD = True
11 | try:
12 | import tensorboardX
13 | print('Using tensorboardX')
14 | except:
15 | USE_TENSORBOARD = False
16 |
17 | class Logger(object):
18 | def __init__(self, opt):
19 | """Create a summary writer logging to log_dir."""
20 | if not os.path.exists(opt.save_dir):
21 | os.makedirs(opt.save_dir)
22 | if not os.path.exists(opt.debug_dir):
23 | os.makedirs(opt.debug_dir)
24 |
25 | time_str = time.strftime('%Y-%m-%d-%H-%M')
26 |
27 | args = dict((name, getattr(opt, name)) for name in dir(opt)
28 | if not name.startswith('_'))
29 | file_name = os.path.join(opt.save_dir, 'opt.txt')
30 | with open(file_name, 'wt') as opt_file:
31 | opt_file.write('==> torch version: {}\n'.format(torch.__version__))
32 | opt_file.write('==> cudnn version: {}\n'.format(
33 | torch.backends.cudnn.version()))
34 | opt_file.write('==> Cmd:\n')
35 | opt_file.write(str(sys.argv))
36 | opt_file.write('\n==> Opt:\n')
37 | for k, v in sorted(args.items()):
38 | opt_file.write(' %s: %s\n' % (str(k), str(v)))
39 |
40 | log_dir = opt.save_dir + '/logs_{}'.format(time_str)
41 | if USE_TENSORBOARD:
42 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
43 | else:
44 | if not os.path.exists(os.path.dirname(log_dir)):
45 | os.mkdir(os.path.dirname(log_dir))
46 | if not os.path.exists(log_dir):
47 | os.mkdir(log_dir)
48 | self.log = open(log_dir + '/log.txt', 'w')
49 | try:
50 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
51 | except:
52 | pass
53 | self.start_line = True
54 |
55 | def write(self, txt):
56 | if self.start_line:
57 | time_str = time.strftime('%Y-%m-%d-%H-%M')
58 | self.log.write('{}: {}'.format(time_str, txt))
59 | else:
60 | self.log.write(txt)
61 | self.start_line = False
62 | if '\n' in txt:
63 | self.start_line = True
64 | self.log.flush()
65 |
66 | def close(self):
67 | self.log.close()
68 |
69 | def scalar_summary(self, tag, value, step):
70 | """Log a scalar variable."""
71 | if USE_TENSORBOARD:
72 | self.writer.add_scalar(tag, value, step)
73 |
--------------------------------------------------------------------------------
/src/lib/models/data_parallel.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.nn.modules import Module
3 | from torch.nn.parallel.scatter_gather import gather
4 | from torch.nn.parallel.replicate import replicate
5 | from torch.nn.parallel.parallel_apply import parallel_apply
6 |
7 |
8 | from .scatter_gather import scatter_kwargs
9 |
10 | class _DataParallel(Module):
11 | r"""Implements data parallelism at the module level.
12 |
13 | This container parallelizes the application of the given module by
14 | splitting the input across the specified devices by chunking in the batch
15 | dimension. In the forward pass, the module is replicated on each device,
16 | and each replica handles a portion of the input. During the backwards
17 | pass, gradients from each replica are summed into the original module.
18 |
19 | The batch size should be larger than the number of GPUs used. It should
20 | also be an integer multiple of the number of GPUs so that each chunk is the
21 | same size (so that each GPU processes the same number of samples).
22 |
23 | See also: :ref:`cuda-nn-dataparallel-instead`
24 |
25 | Arbitrary positional and keyword inputs are allowed to be passed into
26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim
27 | specified (default 0). Primitive types will be broadcasted, but all
28 | other types will be a shallow copy and can be corrupted if written to in
29 | the model's forward pass.
30 |
31 | Args:
32 | module: module to be parallelized
33 | device_ids: CUDA devices (default: all devices)
34 | output_device: device location of output (default: device_ids[0])
35 |
36 | Example::
37 |
38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
39 | >>> output = net(input_var)
40 | """
41 |
42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
43 |
44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
45 | super(_DataParallel, self).__init__()
46 |
47 | if not torch.cuda.is_available():
48 | self.module = module
49 | self.device_ids = []
50 | return
51 |
52 | if device_ids is None:
53 | device_ids = list(range(torch.cuda.device_count()))
54 | if output_device is None:
55 | output_device = device_ids[0]
56 | self.dim = dim
57 | self.module = module
58 | self.device_ids = device_ids
59 | self.chunk_sizes = chunk_sizes
60 | self.output_device = output_device
61 | if len(self.device_ids) == 1:
62 | self.module.cuda(device_ids[0])
63 |
64 | def forward(self, *inputs, **kwargs):
65 | if not self.device_ids:
66 | return self.module(*inputs, **kwargs)
67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
68 | if len(self.device_ids) == 1:
69 | return self.module(*inputs[0], **kwargs[0])
70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
71 | outputs = self.parallel_apply(replicas, inputs, kwargs)
72 | return self.gather(outputs, self.output_device)
73 |
74 | def replicate(self, module, device_ids):
75 | return replicate(module, device_ids)
76 |
77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
79 |
80 | def parallel_apply(self, replicas, inputs, kwargs):
81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
82 |
83 | def gather(self, outputs, output_device):
84 | return gather(outputs, output_device, dim=self.dim)
85 |
86 |
87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
89 |
90 | This is the functional version of the DataParallel module.
91 |
92 | Args:
93 | module: the module to evaluate in parallel
94 | inputs: inputs to the module
95 | device_ids: GPU ids on which to replicate module
96 | output_device: GPU location of the output Use -1 to indicate the CPU.
97 | (default: device_ids[0])
98 | Returns:
99 | a Variable containing the result of module(input) located on
100 | output_device
101 | """
102 | if not isinstance(inputs, tuple):
103 | inputs = (inputs,)
104 |
105 | if device_ids is None:
106 | device_ids = list(range(torch.cuda.device_count()))
107 |
108 | if output_device is None:
109 | output_device = device_ids[0]
110 |
111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 | if len(device_ids) == 1:
113 | return module(*inputs[0], **module_kwargs[0])
114 | used_device_ids = device_ids[:len(inputs)]
115 | replicas = replicate(module, used_device_ids)
116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 | return gather(outputs, output_device, dim)
118 |
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 | if chunk_sizes is None:
121 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 | standard_size = True
123 | for i in range(1, len(chunk_sizes)):
124 | if chunk_sizes[i] != chunk_sizes[0]:
125 | standard_size = False
126 | if standard_size:
127 | return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)
--------------------------------------------------------------------------------
/src/lib/models/decode.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import numpy as np
8 | from .utils import _gather_feat, _tranpose_and_gather_feat
9 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
10 |
11 |
12 | def _nms(heat, kernel=3):
13 | pad = (kernel - 1) // 2
14 |
15 | hmax = nn.functional.max_pool2d(
16 | heat, (kernel, kernel), stride=1, padding=pad)
17 | keep = (hmax == heat).float()
18 | return heat * keep
19 |
20 |
21 | def _left_aggregate(heat):
22 | '''
23 | heat: batchsize x channels x h x w
24 | '''
25 | shape = heat.shape
26 | heat = heat.reshape(-1, heat.shape[3])
27 | heat = heat.transpose(1, 0).contiguous()
28 | ret = heat.clone()
29 | for i in range(1, heat.shape[0]):
30 | inds = (heat[i] >= heat[i - 1])
31 | ret[i] += ret[i - 1] * inds.float()
32 | return (ret - heat).transpose(1, 0).reshape(shape)
33 |
34 |
35 | def _right_aggregate(heat):
36 | '''
37 | heat: batchsize x channels x h x w
38 | '''
39 | shape = heat.shape
40 | heat = heat.reshape(-1, heat.shape[3])
41 | heat = heat.transpose(1, 0).contiguous()
42 | ret = heat.clone()
43 | for i in range(heat.shape[0] - 2, -1, -1):
44 | inds = (heat[i] >= heat[i +1])
45 | ret[i] += ret[i + 1] * inds.float()
46 | return (ret - heat).transpose(1, 0).reshape(shape)
47 |
48 |
49 | def _top_aggregate(heat):
50 | '''
51 | heat: batchsize x channels x h x w
52 | '''
53 | heat = heat.transpose(3, 2)
54 | shape = heat.shape
55 | heat = heat.reshape(-1, heat.shape[3])
56 | heat = heat.transpose(1, 0).contiguous()
57 | ret = heat.clone()
58 | for i in range(1, heat.shape[0]):
59 | inds = (heat[i] >= heat[i - 1])
60 | ret[i] += ret[i - 1] * inds.float()
61 | return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
62 |
63 |
64 | def _bottom_aggregate(heat):
65 | '''
66 | heat: batchsize x channels x h x w
67 | '''
68 | heat = heat.transpose(3, 2)
69 | shape = heat.shape
70 | heat = heat.reshape(-1, heat.shape[3])
71 | heat = heat.transpose(1, 0).contiguous()
72 | ret = heat.clone()
73 | for i in range(heat.shape[0] - 2, -1, -1):
74 | inds = (heat[i] >= heat[i + 1])
75 | ret[i] += ret[i + 1] * inds.float()
76 | return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
77 |
78 |
79 | def _h_aggregate(heat, aggr_weight=0.1):
80 | return aggr_weight * _left_aggregate(heat) + \
81 | aggr_weight * _right_aggregate(heat) + heat
82 |
83 |
84 | def _v_aggregate(heat, aggr_weight=0.1):
85 | return aggr_weight * _top_aggregate(heat) + \
86 | aggr_weight * _bottom_aggregate(heat) + heat
87 | '''
88 | # Slow for large number of categories
89 | def _topk(scores, K=40):
90 | batch, cat, height, width = scores.size()
91 | topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
92 |
93 | topk_clses = (topk_inds / (height * width)).int()
94 |
95 | topk_inds = topk_inds % (height * width)
96 | topk_ys = (topk_inds / width).int().float()
97 | topk_xs = (topk_inds % width).int().float()
98 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
99 | '''
100 |
101 |
102 | def _topk_channel(scores, K=40):
103 | batch, cat, height, width = scores.size()
104 |
105 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
106 |
107 | topk_inds = topk_inds % (height * width)
108 | topk_ys = (topk_inds / width).int().float()
109 | topk_xs = (topk_inds % width).int().float()
110 |
111 | return topk_scores, topk_inds, topk_ys, topk_xs
112 |
113 |
114 | def _topk(scores, K=40):
115 | batch, cat, height, width = scores.size()
116 |
117 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
118 |
119 | topk_inds = topk_inds % (height * width)
120 | topk_ys = (topk_inds / width).int().float()
121 | topk_xs = (topk_inds % width).int().float()
122 |
123 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
124 | topk_clses = (topk_ind / K).int()
125 | topk_inds = _gather_feat(
126 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
127 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
128 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
129 |
130 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
131 |
132 |
133 | def ctdet_decode(hm_act, wh_act, reg_act=None, K=100):
134 | batch, cat, height, width = hm_act.size()
135 |
136 | hm_act = _nms(hm_act, kernel=3) # need to adjust the kernel size.
137 |
138 | scores_act, inds_act, clses_act, ys_act, xs_act = _topk(hm_act, K=K)
139 |
140 | if reg_act is not None:
141 | reg_act = _tranpose_and_gather_feat(reg_act, inds_act)
142 | reg_act = reg_act.view(batch, K, 2)
143 | xs_act = xs_act.view(batch, K, 1) + reg_act[:, :, 0:1]
144 | ys_act = ys_act.view(batch, K, 1) + reg_act[:, :, 1:2]
145 | else:
146 | xs_act = xs_act.view(batch, K, 1) + 0.5
147 | ys_act = ys_act.view(batch, K, 1) + 0.5
148 |
149 | wh_act = _tranpose_and_gather_feat(wh_act, inds_act)
150 |
151 | wh_act = wh_act.view(batch, K, 2)
152 |
153 | clses_act = clses_act.view(batch, K, 1).float()
154 | scores_act = scores_act.view(batch, K, 1)
155 |
156 | bboxes = torch.cat([xs_act - wh_act[..., 0:1] / 2,
157 | ys_act - wh_act[..., 1:2] / 2,
158 | xs_act + wh_act[..., 0:1] / 2,
159 | ys_act + wh_act[..., 1:2] / 2], dim=2)
160 |
161 | detections_act = torch.cat([xs_act, ys_act, bboxes, scores_act, clses_act], dim=2)
162 |
163 | # print(detections_act.shape)
164 | # print(detections_act)
165 |
166 | return detections_act
167 |
--------------------------------------------------------------------------------
/src/lib/models/losses.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Portions of this code are from
3 | # CornerNet (https://github.com/princeton-vl/CornerNet)
4 | # Copyright (c) 2018, University of Michigan
5 | # Licensed under the BSD 3-Clause License
6 | # ------------------------------------------------------------------------------
7 | from __future__ import absolute_import
8 | from __future__ import division
9 | from __future__ import print_function
10 |
11 | import torch
12 | import torch.nn as nn
13 | from .utils import _tranpose_and_gather_feat
14 | import torch.nn.functional as F
15 |
16 |
17 | def _slow_neg_loss(pred, gt):
18 | '''focal loss from CornerNet'''
19 | pos_inds = gt.eq(1)
20 | neg_inds = gt.lt(1)
21 |
22 | neg_weights = torch.pow(1 - gt[neg_inds], 4)
23 |
24 | loss = 0
25 | pos_pred = pred[pos_inds]
26 | neg_pred = pred[neg_inds]
27 |
28 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
29 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
30 |
31 | num_pos = pos_inds.float().sum()
32 | pos_loss = pos_loss.sum()
33 | neg_loss = neg_loss.sum()
34 |
35 | if pos_pred.nelement() == 0:
36 | loss = loss - neg_loss
37 | else:
38 | loss = loss - (pos_loss + neg_loss) / num_pos
39 | return loss
40 |
41 |
42 | def _neg_loss(pred, gt):
43 | ''' Modified focal loss. Exactly the same as CornerNet.
44 | Runs faster and costs a little bit more memory
45 | Arguments:
46 | pred (batch x c x h x w)
47 | gt_regr (batch x c x h x w)
48 | '''
49 | pos_inds = gt.eq(1).float()
50 | neg_inds = gt.lt(1).float()
51 |
52 | neg_weights = torch.pow(1 - gt, 4)
53 |
54 | loss = 0
55 |
56 | pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
57 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
58 |
59 | num_pos = pos_inds.float().sum()
60 | pos_loss = pos_loss.sum()
61 | neg_loss = neg_loss.sum()
62 |
63 | if num_pos == 0:
64 | loss = loss - neg_loss
65 | else:
66 | loss = loss - (pos_loss + neg_loss) / num_pos
67 | return loss
68 |
69 | def _not_faster_neg_loss(pred, gt):
70 | pos_inds = gt.eq(1).float()
71 | neg_inds = gt.lt(1).float()
72 | num_pos = pos_inds.float().sum()
73 | neg_weights = torch.pow(1 - gt, 4)
74 |
75 | loss = 0
76 | trans_pred = pred * neg_inds + (1 - pred) * pos_inds
77 | weight = neg_weights * neg_inds + pos_inds
78 | all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
79 | all_loss = all_loss.sum()
80 |
81 | if num_pos > 0:
82 | all_loss /= num_pos
83 | loss -= all_loss
84 | return loss
85 |
86 | def _slow_reg_loss(regr, gt_regr, mask):
87 | num = mask.float().sum()
88 | mask = mask.unsqueeze(2).expand_as(gt_regr)
89 |
90 | regr = regr[mask]
91 | gt_regr = gt_regr[mask]
92 |
93 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
94 | regr_loss = regr_loss / (num + 1e-4)
95 | return regr_loss
96 |
97 | def _reg_loss(regr, gt_regr, mask):
98 | ''' L1 regression loss
99 | Arguments:
100 | regr (batch x max_objects x dim)
101 | gt_regr (batch x max_objects x dim)
102 | mask (batch x max_objects)
103 | '''
104 | num = mask.float().sum()
105 | mask = mask.unsqueeze(2).expand_as(gt_regr).float()
106 |
107 | regr = regr * mask
108 | gt_regr = gt_regr * mask
109 |
110 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
111 | regr_loss = regr_loss / (num + 1e-4)
112 | return regr_loss
113 |
114 | class FocalLoss(nn.Module):
115 | '''nn.Module warpper for focal loss'''
116 | def __init__(self):
117 | super(FocalLoss, self).__init__()
118 | self.neg_loss = _neg_loss
119 |
120 | def forward(self, out, target):
121 | return self.neg_loss(out, target)
122 |
123 | class RegLoss(nn.Module):
124 | '''Regression loss for an output tensor
125 | Arguments:
126 | output (batch x dim x h x w)
127 | mask (batch x max_objects)
128 | ind (batch x max_objects)
129 | target (batch x max_objects x dim)
130 | '''
131 | def __init__(self):
132 | super(RegLoss, self).__init__()
133 |
134 | def forward(self, output, mask, ind, target):
135 | pred = _tranpose_and_gather_feat(output, ind)
136 | loss = _reg_loss(pred, target, mask)
137 | return loss
138 |
139 | class RegL1Loss(nn.Module):
140 | def __init__(self):
141 | super(RegL1Loss, self).__init__()
142 |
143 | def forward(self, output, mask, ind, target):
144 | pred = _tranpose_and_gather_feat(output, ind)
145 | mask = mask.unsqueeze(2).expand_as(pred).float()
146 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
147 | loss = F.l1_loss(pred * mask, target * mask, size_average=False)
148 | loss = loss / (mask.sum() + 1e-4)
149 | return loss
150 |
151 | class NormRegL1Loss(nn.Module):
152 | def __init__(self):
153 | super(NormRegL1Loss, self).__init__()
154 |
155 | def forward(self, output, mask, ind, target):
156 | pred = _tranpose_and_gather_feat(output, ind)
157 | mask = mask.unsqueeze(2).expand_as(pred).float()
158 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
159 | pred = pred / (target + 1e-4)
160 | target = target * 0 + 1
161 | loss = F.l1_loss(pred * mask, target * mask, size_average=False)
162 | loss = loss / (mask.sum() + 1e-4)
163 | return loss
164 |
165 | class RegWeightedL1Loss(nn.Module):
166 | def __init__(self):
167 | super(RegWeightedL1Loss, self).__init__()
168 |
169 | def forward(self, output, mask, ind, target):
170 | pred = _tranpose_and_gather_feat(output, ind)
171 | mask = mask.float()
172 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
173 | loss = F.l1_loss(pred * mask, target * mask, size_average=False)
174 | loss = loss / (mask.sum() + 1e-4)
175 | return loss
176 |
177 | class L1Loss(nn.Module):
178 | def __init__(self):
179 | super(L1Loss, self).__init__()
180 |
181 | def forward(self, output, mask, ind, target):
182 | pred = _tranpose_and_gather_feat(output, ind)
183 | mask = mask.unsqueeze(2).expand_as(pred).float()
184 | loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
185 | return loss
186 |
187 | class BinRotLoss(nn.Module):
188 | def __init__(self):
189 | super(BinRotLoss, self).__init__()
190 |
191 | def forward(self, output, mask, ind, rotbin, rotres):
192 | pred = _tranpose_and_gather_feat(output, ind)
193 | loss = compute_rot_loss(pred, rotbin, rotres, mask)
194 | return loss
195 |
196 | def compute_res_loss(output, target):
197 | return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
198 |
199 | # TODO: weight
200 | def compute_bin_loss(output, target, mask):
201 | mask = mask.expand_as(output)
202 | output = output * mask.float()
203 | return F.cross_entropy(output, target, reduction='elementwise_mean')
204 |
205 | def compute_rot_loss(output, target_bin, target_res, mask):
206 | # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
207 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
208 | # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
209 | # target_res: (B, 128, 2) [bin1_res, bin2_res]
210 | # mask: (B, 128, 1)
211 | # import pdb; pdb.set_trace()
212 | output = output.view(-1, 8)
213 | target_bin = target_bin.view(-1, 2)
214 | target_res = target_res.view(-1, 2)
215 | mask = mask.view(-1, 1)
216 | loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
217 | loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
218 | loss_res = torch.zeros_like(loss_bin1)
219 | if target_bin[:, 0].nonzero().shape[0] > 0:
220 | idx1 = target_bin[:, 0].nonzero()[:, 0]
221 | valid_output1 = torch.index_select(output, 0, idx1.long())
222 | valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
223 | loss_sin1 = compute_res_loss(
224 | valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
225 | loss_cos1 = compute_res_loss(
226 | valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
227 | loss_res += loss_sin1 + loss_cos1
228 | if target_bin[:, 1].nonzero().shape[0] > 0:
229 | idx2 = target_bin[:, 1].nonzero()[:, 0]
230 | valid_output2 = torch.index_select(output, 0, idx2.long())
231 | valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
232 | loss_sin2 = compute_res_loss(
233 | valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
234 | loss_cos2 = compute_res_loss(
235 | valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
236 | loss_res += loss_sin2 + loss_cos2
237 | return loss_bin1 + loss_bin2 + loss_res
238 |
--------------------------------------------------------------------------------
/src/lib/models/model.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torchvision.models as models
6 | import torch
7 | import torch.nn as nn
8 | import os
9 |
10 | # from .networks.msra_resnet import get_pose_net
11 | # from .networks.dlav0 import get_pose_net as get_dlav0
12 | # from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
13 | # from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
14 | from .networks.large_hourglass import get_large_hourglass_net
15 |
16 | _model_factory = {
17 | # 'res': get_pose_net, # default Resnet with deconv
18 | # 'dlav0': get_dlav0, # default DLAup
19 | # 'dla': get_dla_dcn,
20 | # 'resdcn': get_pose_net_dcn,
21 | 'hourglass': get_large_hourglass_net,
22 | }
23 |
24 |
25 | def create_model(arch, heads, head_conv):
26 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
27 | arch = arch[:arch.find('_')] if '_' in arch else arch
28 | get_model = _model_factory[arch]
29 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv)
30 | return model
31 |
32 |
33 | # def load_model(model, model_path, optimizer=None, resume=False,
34 | # lr=None, lr_step=None):
35 | # start_epoch = 0
36 | # checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
37 | # print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
38 | # state_dict_ = checkpoint['state_dict']
39 | # state_dict = {}
40 | #
41 | # # convert data_parallal to model
42 | # for k in state_dict_:
43 | # if k.startswith('module') and not k.startswith('module_list'):
44 | # state_dict[k[7:]] = state_dict_[k]
45 | # else:
46 | # state_dict[k] = state_dict_[k]
47 | # model_state_dict = model.state_dict()
48 | #
49 | # # check loaded parameters and created model parameters
50 | # for k in state_dict:
51 | # if k in model_state_dict:
52 | # if state_dict[k].shape != model_state_dict[k].shape:
53 | # print('Skip loading parameter {}, required shape{}, '\
54 | # 'loaded shape{}.'.format(
55 | # k, model_state_dict[k].shape, state_dict[k].shape))
56 | # state_dict[k] = model_state_dict[k]
57 | # else:
58 | # print('Drop parameter {}.'.format(k))
59 | # for k in model_state_dict:
60 | # if not (k in state_dict):
61 | # print('No param {}.'.format(k))
62 | # state_dict[k] = model_state_dict[k]
63 | # model.load_state_dict(state_dict, strict=False)
64 | #
65 | # # resume optimizer parameters
66 | # if optimizer is not None and resume:
67 | # if 'optimizer' in checkpoint:
68 | # optimizer.load_state_dict(checkpoint['optimizer'])
69 | # start_epoch = checkpoint['epoch']
70 | # start_lr = lr
71 | # for step in lr_step:
72 | # if start_epoch >= step:
73 | # start_lr *= 0.1
74 | # for param_group in optimizer.param_groups:
75 | # param_group['lr'] = start_lr
76 | # print('Resumed optimizer with start lr', start_lr)
77 | # else:
78 | # print('No optimizer parameters in checkpoint.')
79 | # if optimizer is not None:
80 | # return model, optimizer, start_epoch
81 | # else:
82 | # return model
83 |
84 | def load_model(model, model_path):
85 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
86 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
87 | state_dict_ = checkpoint['state_dict']
88 | state_dict = {}
89 |
90 | # convert data_parallal to model
91 | for k in state_dict_:
92 | if k.startswith('module') and not k.startswith('module_list'):
93 | state_dict[k[7:]] = state_dict_[k]
94 | else:
95 | state_dict[k] = state_dict_[k]
96 | model_state_dict = model.state_dict()
97 |
98 | # check loaded parameters and created model parameters
99 | for k in state_dict:
100 | if k in model_state_dict:
101 | if state_dict[k].shape != model_state_dict[k].shape:
102 | print('Skip loading parameter {}, required shape{}, '\
103 | 'loaded shape{}.'.format(
104 | k, model_state_dict[k].shape, state_dict[k].shape))
105 | state_dict[k] = model_state_dict[k]
106 | else:
107 | print('Drop parameter {}.'.format(k))
108 | for k in model_state_dict:
109 | if not (k in state_dict):
110 | print('No param {}.'.format(k))
111 | state_dict[k] = model_state_dict[k]
112 | model.load_state_dict(state_dict, strict=False)
113 |
114 | return model
115 |
116 |
117 | def save_model(path, epoch, model, optimizer=None):
118 | if isinstance(model, torch.nn.DataParallel):
119 | state_dict = model.module.state_dict()
120 | else:
121 | state_dict = model.state_dict()
122 | data = {'epoch': epoch,
123 | 'state_dict': state_dict}
124 | if not (optimizer is None):
125 | data['optimizer'] = optimizer.state_dict()
126 | torch.save(data, path)
127 |
128 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2019, Charles Shang
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | 3. Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/README.md:
--------------------------------------------------------------------------------
1 | ## Deformable Convolutional Networks V2 with Pytorch
2 |
3 | ### Build
4 | ```bash
5 | ./make.sh # build
6 | python test.py # run examples and gradient check
7 | ```
8 |
9 | ### An Example
10 | - deformable conv
11 | ```python
12 | from dcn_v2 import DCN
13 | input = torch.randn(2, 64, 128, 128).cuda()
14 | # wrap all things (offset and mask) in DCN
15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 | output = dcn(input)
17 | print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 | from dcn_v2 import DCNPooling
22 | input = torch.randn(2, 32, 64, 64).cuda()
23 | batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 | x = torch.randint(256, (20, 1)).cuda().float()
25 | y = torch.randint(256, (20, 1)).cuda().float()
26 | w = torch.randint(64, (20, 1)).cuda().float()
27 | h = torch.randint(64, (20, 1)).cuda().float()
28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 |
30 | # mdformable pooling (V2)
31 | # wrap all things (offset and mask) in DCNPooling
32 | dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 | pooled_size=7,
34 | output_dim=32,
35 | no_trans=False,
36 | group_size=1,
37 | trans_std=0.1).cuda()
38 |
39 | dout = dpooling(input, rois)
40 | ```
41 |
42 | ### Known Issues:
43 |
44 | - [x] Gradient check w.r.t offset (solved)
45 | - [ ] Backward is not reentrant (minor)
46 |
47 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
48 |
49 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
50 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
51 | non-differential points?
52 |
53 | Update: all gradient check passes with double precision.
54 |
55 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
56 | float `<1e-15` for double),
57 | so it may not be a serious problem (?)
58 |
59 | Please post an issue or PR if you have any comments.
60 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/models/networks/DCNv2/__init__.py
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 | # from torch.utils.cpp_extension import BuildExtension
5 |
6 |
7 | sources = ['src/dcn_v2.c']
8 | headers = ['src/dcn_v2.h']
9 | defines = []
10 | with_cuda = False
11 |
12 | extra_objects = []
13 | if torch.cuda.is_available():
14 | print('Including CUDA code.')
15 | sources += ['src/dcn_v2_cuda.c']
16 | headers += ['src/dcn_v2_cuda.h']
17 | defines += [('WITH_CUDA', None)]
18 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda.cu.o']
19 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda.cu.o']
20 | with_cuda = True
21 | else:
22 | raise ValueError('CUDA is not available')
23 |
24 | extra_compile_args = ['-fopenmp', '-std=c99']
25 |
26 | this_file = os.path.dirname(os.path.realpath(__file__))
27 | print(this_file)
28 | sources = [os.path.join(this_file, fname) for fname in sources]
29 | headers = [os.path.join(this_file, fname) for fname in headers]
30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
31 |
32 |
33 | # ffi = BuildExtension(
34 | ffi = create_extension(
35 | '_ext.dcn_v2',
36 | headers=headers,
37 | sources=sources,
38 | define_macros=defines,
39 | relative_to=__file__,
40 | with_cuda=with_cuda,
41 | extra_objects=extra_objects,
42 | extra_compile_args=extra_compile_args
43 | )
44 |
45 | if __name__ == '__main__':
46 | ffi.build()
47 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build_double.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 | # from torch.utils.cpp_extension import BuildExtension
5 |
6 |
7 | sources = ['src/dcn_v2_double.c']
8 | headers = ['src/dcn_v2_double.h']
9 | defines = []
10 | with_cuda = False
11 |
12 | extra_objects = []
13 | if torch.cuda.is_available():
14 | print('Including CUDA code.')
15 | sources += ['src/dcn_v2_cuda_double.c']
16 | headers += ['src/dcn_v2_cuda_double.h']
17 | defines += [('WITH_CUDA', None)]
18 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda_double.cu.o']
19 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda_double.cu.o']
20 | with_cuda = True
21 | else:
22 | raise ValueError('CUDA is not available')
23 |
24 | extra_compile_args = ['-fopenmp', '-std=c99']
25 |
26 | this_file = os.path.dirname(os.path.realpath(__file__))
27 | print(this_file)
28 | sources = [os.path.join(this_file, fname) for fname in sources]
29 | headers = [os.path.join(this_file, fname) for fname in headers]
30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
31 |
32 | # ffi = BuildExtension(
33 | ffi = create_extension(
34 | '_ext.dcn_v2_double',
35 | headers=headers,
36 | sources=sources,
37 | define_macros=defines,
38 | relative_to=__file__,
39 | with_cuda=with_cuda,
40 | extra_objects=extra_objects,
41 | extra_compile_args=extra_compile_args
42 | )
43 |
44 | if __name__ == '__main__':
45 | ffi.build()
46 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/dcn_v2.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import torch
7 | import math
8 | from torch import nn
9 | from torch.nn.modules.utils import _pair
10 |
11 | from .dcn_v2_func import DCNv2Function
12 | from .dcn_v2_func import DCNv2PoolingFunction
13 |
14 | class DCNv2(nn.Module):
15 |
16 | def __init__(self, in_channels, out_channels,
17 | kernel_size, stride, padding, dilation=1, deformable_groups=1):
18 | super(DCNv2, self).__init__()
19 | self.in_channels = in_channels
20 | self.out_channels = out_channels
21 | self.kernel_size = _pair(kernel_size)
22 | self.stride = stride
23 | self.padding = padding
24 | self.dilation = dilation
25 | self.deformable_groups = deformable_groups
26 |
27 | self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size))
28 | self.bias = nn.Parameter(torch.Tensor(out_channels))
29 | self.reset_parameters()
30 |
31 | def reset_parameters(self):
32 | n = self.in_channels
33 | for k in self.kernel_size:
34 | n *= k
35 | stdv = 1. / math.sqrt(n)
36 | self.weight.data.uniform_(-stdv, stdv)
37 | self.bias.data.zero_()
38 |
39 | def forward(self, input, offset, mask):
40 | func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups)
41 | return func(input, offset, mask, self.weight, self.bias)
42 |
43 |
44 | class DCN(DCNv2):
45 |
46 | def __init__(self, in_channels, out_channels,
47 | kernel_size, stride, padding,
48 | dilation=1, deformable_groups=1):
49 | super(DCN, self).__init__(in_channels, out_channels,
50 | kernel_size, stride, padding, dilation, deformable_groups)
51 |
52 | self.conv_offset_mask = nn.Conv2d(self.in_channels,
53 | self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
54 | kernel_size=self.kernel_size,
55 | stride=(self.stride, self.stride),
56 | padding=(self.padding, self.padding),
57 | bias=True)
58 | self.init_offset()
59 |
60 | def init_offset(self):
61 | self.conv_offset_mask.weight.data.zero_()
62 | self.conv_offset_mask.bias.data.zero_()
63 |
64 | def forward(self, input):
65 | out = self.conv_offset_mask(input)
66 | o1, o2, mask = torch.chunk(out, 3, dim=1)
67 | offset = torch.cat((o1, o2), dim=1)
68 | mask = torch.sigmoid(mask)
69 | func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups)
70 | return func(input, offset, mask, self.weight, self.bias)
71 |
72 |
73 | class DCNv2Pooling(nn.Module):
74 |
75 | def __init__(self,
76 | spatial_scale,
77 | pooled_size,
78 | output_dim,
79 | no_trans,
80 | group_size=1,
81 | part_size=None,
82 | sample_per_part=4,
83 | trans_std=.0):
84 | super(DCNv2Pooling, self).__init__()
85 | self.spatial_scale = spatial_scale
86 | self.pooled_size = pooled_size
87 | self.output_dim = output_dim
88 | self.no_trans = no_trans
89 | self.group_size = group_size
90 | self.part_size = pooled_size if part_size is None else part_size
91 | self.sample_per_part = sample_per_part
92 | self.trans_std = trans_std
93 | self.func = DCNv2PoolingFunction(self.spatial_scale,
94 | self.pooled_size,
95 | self.output_dim,
96 | self.no_trans,
97 | self.group_size,
98 | self.part_size,
99 | self.sample_per_part,
100 | self.trans_std)
101 |
102 | def forward(self, data, rois, offset):
103 |
104 | if self.no_trans:
105 | offset = data.new()
106 | return self.func(data, rois, offset)
107 |
108 | class DCNPooling(DCNv2Pooling):
109 |
110 | def __init__(self,
111 | spatial_scale,
112 | pooled_size,
113 | output_dim,
114 | no_trans,
115 | group_size=1,
116 | part_size=None,
117 | sample_per_part=4,
118 | trans_std=.0,
119 | deform_fc_dim=1024):
120 | super(DCNPooling, self).__init__(spatial_scale,
121 | pooled_size,
122 | output_dim,
123 | no_trans,
124 | group_size,
125 | part_size,
126 | sample_per_part,
127 | trans_std)
128 |
129 | self.deform_fc_dim = deform_fc_dim
130 |
131 | if not no_trans:
132 | self.func_offset = DCNv2PoolingFunction(self.spatial_scale,
133 | self.pooled_size,
134 | self.output_dim,
135 | True,
136 | self.group_size,
137 | self.part_size,
138 | self.sample_per_part,
139 | self.trans_std)
140 | self.offset_fc = nn.Sequential(
141 | nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
142 | nn.ReLU(inplace=True),
143 | nn.Linear(self.deform_fc_dim, self.deform_fc_dim),
144 | nn.ReLU(inplace=True),
145 | nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 2)
146 | )
147 | self.offset_fc[4].weight.data.zero_()
148 | self.offset_fc[4].bias.data.zero_()
149 | self.mask_fc = nn.Sequential(
150 | nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
151 | nn.ReLU(inplace=True),
152 | nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 1),
153 | nn.Sigmoid()
154 | )
155 | self.mask_fc[2].weight.data.zero_()
156 | self.mask_fc[2].bias.data.zero_()
157 |
158 | def forward(self, data, rois):
159 | if self.no_trans:
160 | offset = data.new()
161 | else:
162 | n = rois.shape[0]
163 | offset = data.new()
164 | x = self.func_offset(data, rois, offset)
165 | offset = self.offset_fc(x.view(n, -1))
166 | offset = offset.view(n, 2, self.pooled_size, self.pooled_size)
167 | mask = self.mask_fc(x.view(n, -1))
168 | mask = mask.view(n, 1, self.pooled_size, self.pooled_size)
169 | feat = self.func(data, rois, offset) * mask
170 | return feat
171 | return self.func(data, rois, offset)
172 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/dcn_v2_func.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from __future__ import absolute_import
3 | from __future__ import print_function
4 | from __future__ import division
5 |
6 | import torch
7 | from torch.autograd import Function
8 |
9 | from ._ext import dcn_v2 as _backend
10 | # from _ext import dcn_v2_double as _backend
11 |
12 |
13 | class DCNv2Function(Function):
14 |
15 | def __init__(self, stride, padding, dilation=1, deformable_groups=1):
16 | super(DCNv2Function, self).__init__()
17 | self.stride = stride
18 | self.padding = padding
19 | self.dilation = dilation
20 | self.deformable_groups = deformable_groups
21 |
22 | def forward(self, input, offset, mask, weight, bias):
23 | if not input.is_cuda:
24 | raise NotImplementedError
25 | if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
26 | self.save_for_backward(input, offset, mask, weight, bias)
27 | output = input.new(*self._infer_shape(input, weight))
28 | self._bufs = [input.new(), input.new()]
29 | _backend.dcn_v2_cuda_forward(input, weight,
30 | bias, self._bufs[0],
31 | offset, mask,
32 | output, self._bufs[1],
33 | weight.shape[2], weight.shape[3],
34 | self.stride, self.stride,
35 | self.padding, self.padding,
36 | self.dilation, self.dilation,
37 | self.deformable_groups)
38 | return output
39 |
40 | def backward(self, grad_output):
41 | if not grad_output.is_cuda:
42 | raise NotImplementedError
43 | input, offset, mask, weight, bias = self.saved_tensors
44 | grad_input = input.new(*input.size()).zero_()
45 | grad_offset = offset.new(*offset.size()).zero_()
46 | grad_mask = mask.new(*mask.size()).zero_()
47 | grad_weight = weight.new(*weight.size()).zero_()
48 | grad_bias = bias.new(*bias.size()).zero_()
49 | _backend.dcn_v2_cuda_backward(input, weight,
50 | bias, self._bufs[0],
51 | offset, mask,
52 | self._bufs[1],
53 | grad_input, grad_weight,
54 | grad_bias, grad_offset,
55 | grad_mask, grad_output,
56 | weight.shape[2], weight.shape[3],
57 | self.stride, self.stride,
58 | self.padding, self.padding,
59 | self.dilation, self.dilation,
60 | self.deformable_groups)
61 |
62 | return grad_input, grad_offset, grad_mask, grad_weight, grad_bias
63 |
64 | def _infer_shape(self, input, weight):
65 | n = input.size(0)
66 | channels_out = weight.size(0)
67 | height, width = input.shape[2:4]
68 | kernel_h, kernel_w = weight.shape[2:4]
69 | height_out = (height + 2 * self.padding -
70 | (self.dilation * (kernel_h - 1) + 1)) // self.stride + 1
71 | width_out = (width + 2 * self.padding - (self.dilation *
72 | (kernel_w - 1) + 1)) // self.stride + 1
73 | return (n, channels_out, height_out, width_out)
74 |
75 |
76 | class DCNv2PoolingFunction(Function):
77 |
78 | def __init__(self,
79 | spatial_scale,
80 | pooled_size,
81 | output_dim,
82 | no_trans,
83 | group_size=1,
84 | part_size=None,
85 | sample_per_part=4,
86 | trans_std=.0):
87 | super(DCNv2PoolingFunction, self).__init__()
88 | self.spatial_scale = spatial_scale
89 | self.pooled_size = pooled_size
90 | self.output_dim = output_dim
91 | self.no_trans = no_trans
92 | self.group_size = group_size
93 | self.part_size = pooled_size if part_size is None else part_size
94 | self.sample_per_part = sample_per_part
95 | self.trans_std = trans_std
96 |
97 | assert self.trans_std >= 0.0 and self.trans_std <= 1.0
98 |
99 | def forward(self, data, rois, offset):
100 | if not data.is_cuda:
101 | raise NotImplementedError
102 |
103 | output = data.new(*self._infer_shape(data, rois))
104 | output_count = data.new(*self._infer_shape(data, rois))
105 | _backend.dcn_v2_psroi_pooling_cuda_forward(data, rois, offset,
106 | output, output_count,
107 | self.no_trans, self.spatial_scale,
108 | self.output_dim, self.group_size,
109 | self.pooled_size, self.part_size,
110 | self.sample_per_part, self.trans_std)
111 |
112 | if data.requires_grad or rois.requires_grad or offset.requires_grad:
113 | self.save_for_backward(data, rois, offset, output_count)
114 |
115 | return output
116 |
117 | def backward(self, grad_output):
118 | if not grad_output.is_cuda:
119 | raise NotImplementedError
120 |
121 | data, rois, offset, output_count = self.saved_tensors
122 | grad_input = data.new(*data.size()).zero_()
123 | grad_offset = offset.new(*offset.size()).zero_()
124 |
125 | _backend.dcn_v2_psroi_pooling_cuda_backward(grad_output,
126 | data,
127 | rois,
128 | offset,
129 | output_count,
130 | grad_input,
131 | grad_offset,
132 | self.no_trans,
133 | self.spatial_scale,
134 | self.output_dim,
135 | self.group_size,
136 | self.pooled_size,
137 | self.part_size,
138 | self.sample_per_part,
139 | self.trans_std)
140 | return grad_input, None, grad_offset
141 |
142 | def _infer_shape(self, data, rois):
143 | # _, c, h, w = data.shape[:4]
144 | c = data.shape[1]
145 | n = rois.shape[0]
146 | return (n, self.output_dim, self.pooled_size, self.pooled_size)
147 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd src/cuda
3 |
4 | # compile dcn
5 | nvcc -c -o dcn_v2_im2col_cuda.cu.o dcn_v2_im2col_cuda.cu -x cu -Xcompiler -fPIC
6 | nvcc -c -o dcn_v2_im2col_cuda_double.cu.o dcn_v2_im2col_cuda_double.cu -x cu -Xcompiler -fPIC
7 |
8 | # compile dcn-roi-pooling
9 | nvcc -c -o dcn_v2_psroi_pooling_cuda.cu.o dcn_v2_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
10 | nvcc -c -o dcn_v2_psroi_pooling_cuda_double.cu.o dcn_v2_psroi_pooling_cuda_double.cu -x cu -Xcompiler -fPIC
11 |
12 | cd -
13 | python build.py
14 | python build_double.py
15 |
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
1 | /*!
2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
3 | *
4 | * COPYRIGHT
5 | *
6 | * All contributions by the University of California:
7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
8 | * All rights reserved.
9 | *
10 | * All other contributions:
11 | * Copyright (c) 2014-2017, the respective contributors
12 | * All rights reserved.
13 | *
14 | * Caffe uses a shared copyright model: each contributor holds copyright over
15 | * their contributions to Caffe. The project versioning records all such
16 | * contribution and copyright details. If a contributor wants to further mark
17 | * their specific copyright on a particular contribution, they should indicate
18 | * their copyright solely in the commit message of the change when it is
19 | * committed.
20 | *
21 | * LICENSE
22 | *
23 | * Redistribution and use in source and binary forms, with or without
24 | * modification, are permitted provided that the following conditions are met:
25 | *
26 | * 1. Redistributions of source code must retain the above copyright notice, this
27 | * list of conditions and the following disclaimer.
28 | * 2. Redistributions in binary form must reproduce the above copyright notice,
29 | * this list of conditions and the following disclaimer in the documentation
30 | * and/or other materials provided with the distribution.
31 | *
32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 | *
43 | * CONTRIBUTION AGREEMENT
44 | *
45 | * By contributing to the BVLC/caffe repository through pull-request, comment,
46 | * or otherwise, the contributor releases their content to the
47 | * license and copyright terms herein.
48 | *
49 | ***************** END Caffe Copyright Notice and Disclaimer ********************
50 | *
51 | * Copyright (c) 2018 Microsoft
52 | * Licensed under The MIT License [see LICENSE for details]
53 | * \file modulated_deformable_im2col.h
54 | * \brief Function definitions of converting an image to
55 | * column matrix based on kernel, padding, dilation, and offset.
56 | * These functions are mainly used in deformable convolution operators.
57 | * \ref: https://arxiv.org/abs/1811.11168
58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
59 | */
60 |
61 | /***************** Adapted by Charles Shang *********************/
62 |
63 | #ifndef DCN_V2_IM2COL_CUDA
64 | #define DCN_V2_IM2COL_CUDA
65 |
66 | #ifdef __cplusplus
67 | extern "C"
68 | {
69 | #endif
70 |
71 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
72 | const float *data_im, const float *data_offset, const float *data_mask,
73 | const int batch_size, const int channels, const int height_im, const int width_im,
74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
76 | const int dilation_h, const int dilation_w,
77 | const int deformable_group, float *data_col);
78 |
79 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
80 | const float *data_col, const float *data_offset, const float *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, float *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
88 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
89 | const int batch_size, const int channels, const int height_im, const int width_im,
90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
92 | const int dilation_h, const int dilation_w,
93 | const int deformable_group,
94 | float *grad_offset, float *grad_mask);
95 |
96 | #ifdef __cplusplus
97 | }
98 | #endif
99 |
100 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.h:
--------------------------------------------------------------------------------
1 | /*!
2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
3 | *
4 | * COPYRIGHT
5 | *
6 | * All contributions by the University of California:
7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
8 | * All rights reserved.
9 | *
10 | * All other contributions:
11 | * Copyright (c) 2014-2017, the respective contributors
12 | * All rights reserved.
13 | *
14 | * Caffe uses a shared copyright model: each contributor holds copyright over
15 | * their contributions to Caffe. The project versioning records all such
16 | * contribution and copyright details. If a contributor wants to further mark
17 | * their specific copyright on a particular contribution, they should indicate
18 | * their copyright solely in the commit message of the change when it is
19 | * committed.
20 | *
21 | * LICENSE
22 | *
23 | * Redistribution and use in source and binary forms, with or without
24 | * modification, are permitted provided that the following conditions are met:
25 | *
26 | * 1. Redistributions of source code must retain the above copyright notice, this
27 | * list of conditions and the following disclaimer.
28 | * 2. Redistributions in binary form must reproduce the above copyright notice,
29 | * this list of conditions and the following disclaimer in the documentation
30 | * and/or other materials provided with the distribution.
31 | *
32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 | *
43 | * CONTRIBUTION AGREEMENT
44 | *
45 | * By contributing to the BVLC/caffe repository through pull-request, comment,
46 | * or otherwise, the contributor releases their content to the
47 | * license and copyright terms herein.
48 | *
49 | ***************** END Caffe Copyright Notice and Disclaimer ********************
50 | *
51 | * Copyright (c) 2018 Microsoft
52 | * Licensed under The MIT License [see LICENSE for details]
53 | * \file modulated_deformable_im2col.h
54 | * \brief Function definitions of converting an image to
55 | * column matrix based on kernel, padding, dilation, and offset.
56 | * These functions are mainly used in deformable convolution operators.
57 | * \ref: https://arxiv.org/abs/1811.11168
58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
59 | */
60 |
61 | /***************** Adapted by Charles Shang *********************/
62 |
63 | #ifndef DCN_V2_IM2COL_CUDA_DOUBLE
64 | #define DCN_V2_IM2COL_CUDA_DOUBLE
65 |
66 | #ifdef __cplusplus
67 | extern "C"
68 | {
69 | #endif
70 |
71 | void modulated_deformable_im2col_cuda(cudaStream_t stream,
72 | const double *data_im, const double *data_offset, const double *data_mask,
73 | const int batch_size, const int channels, const int height_im, const int width_im,
74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
76 | const int dilation_h, const int dilation_w,
77 | const int deformable_group, double *data_col);
78 |
79 | void modulated_deformable_col2im_cuda(cudaStream_t stream,
80 | const double *data_col, const double *data_offset, const double *data_mask,
81 | const int batch_size, const int channels, const int height_im, const int width_im,
82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
84 | const int dilation_h, const int dilation_w,
85 | const int deformable_group, double *grad_im);
86 |
87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
88 | const double *data_col, const double *data_im, const double *data_offset, const double *data_mask,
89 | const int batch_size, const int channels, const int height_im, const int width_im,
90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w,
92 | const int dilation_h, const int dilation_w,
93 | const int deformable_group,
94 | double *grad_offset, double *grad_mask);
95 |
96 | #ifdef __cplusplus
97 | }
98 | #endif
99 |
100 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | /*!
2 | * Copyright (c) 2017 Microsoft
3 | * Licensed under The MIT License [see LICENSE for details]
4 | * \file deformable_psroi_pooling.cu
5 | * \brief
6 | * \author Yi Li, Guodong Zhang, Jifeng Dai
7 | */
8 | /***************** Adapted by Charles Shang *********************/
9 |
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA
11 | #define DCN_V2_PSROI_POOLING_CUDA
12 |
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 |
18 | void DeformablePSROIPoolForward(cudaStream_t stream,
19 | const float *data,
20 | const float *bbox,
21 | const float *trans,
22 | float *out,
23 | float *top_count,
24 | const int batch,
25 | const int channels,
26 | const int height,
27 | const int width,
28 | const int num_bbox,
29 | const int channels_trans,
30 | const int no_trans,
31 | const float spatial_scale,
32 | const int output_dim,
33 | const int group_size,
34 | const int pooled_size,
35 | const int part_size,
36 | const int sample_per_part,
37 | const float trans_std);
38 |
39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 | const float *out_grad,
41 | const float *data,
42 | const float *bbox,
43 | const float *trans,
44 | const float *top_count,
45 | float *in_grad,
46 | float *trans_grad,
47 | const int batch,
48 | const int channels,
49 | const int height,
50 | const int width,
51 | const int num_bbox,
52 | const int channels_trans,
53 | const int no_trans,
54 | const float spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const float trans_std);
61 |
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 |
66 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.h:
--------------------------------------------------------------------------------
1 | /*!
2 | * Copyright (c) 2017 Microsoft
3 | * Licensed under The MIT License [see LICENSE for details]
4 | * \file deformable_psroi_pooling.cu
5 | * \brief
6 | * \author Yi Li, Guodong Zhang, Jifeng Dai
7 | */
8 | /***************** Adapted by Charles Shang *********************/
9 |
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA_DOUBLE
11 | #define DCN_V2_PSROI_POOLING_CUDA_DOUBLE
12 |
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 |
18 | void DeformablePSROIPoolForward(cudaStream_t stream,
19 | const double *data,
20 | const double *bbox,
21 | const double *trans,
22 | double *out,
23 | double *top_count,
24 | const int batch,
25 | const int channels,
26 | const int height,
27 | const int width,
28 | const int num_bbox,
29 | const int channels_trans,
30 | const int no_trans,
31 | const double spatial_scale,
32 | const int output_dim,
33 | const int group_size,
34 | const int pooled_size,
35 | const int part_size,
36 | const int sample_per_part,
37 | const double trans_std);
38 |
39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 | const double *out_grad,
41 | const double *data,
42 | const double *bbox,
43 | const double *trans,
44 | const double *top_count,
45 | double *in_grad,
46 | double *trans_grad,
47 | const int batch,
48 | const int channels,
49 | const int height,
50 | const int width,
51 | const int num_bbox,
52 | const int channels_trans,
53 | const int no_trans,
54 | const double spatial_scale,
55 | const int output_dim,
56 | const int group_size,
57 | const int pooled_size,
58 | const int part_size,
59 | const int sample_per_part,
60 | const double trans_std);
61 |
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 |
66 | #endif
--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2.c:
--------------------------------------------------------------------------------
1 | #include