├── LICENSE
├── README.md
├── data
    └── .gitignore
├── exp
    └── .gitignore
├── experiments
    └── ctdet_coco_hg.sh
├── models
    └── .gitignore
└── src
    ├── _init_paths.py
    ├── apply_prior.py
    ├── lib
        ├── datasets
        │   ├── dataset
        │   │   ├── coco.py
        │   │   └── coco_hp.py
        │   ├── dataset_factory.py
        │   └── sample
        │   │   └── ctdet.py
        ├── detectors
        │   ├── base_detector.py
        │   ├── ctdet.py
        │   └── detector_factory.py
        ├── external
        │   ├── .gitignore
        │   ├── Makefile
        │   ├── __init__.py
        │   ├── nms.pyx
        │   └── setup.py
        ├── logger.py
        ├── models
        │   ├── data_parallel.py
        │   ├── decode.py
        │   ├── losses.py
        │   ├── model.py
        │   ├── networks
        │   │   ├── DCNv2
        │   │   │   ├── .gitignore
        │   │   │   ├── LICENSE
        │   │   │   ├── README.md
        │   │   │   ├── __init__.py
        │   │   │   ├── build.py
        │   │   │   ├── build_double.py
        │   │   │   ├── dcn_v2.py
        │   │   │   ├── dcn_v2_func.py
        │   │   │   ├── make.sh
        │   │   │   ├── src
        │   │   │   │   ├── cuda
        │   │   │   │   │   ├── dcn_v2_im2col_cuda.cu
        │   │   │   │   │   ├── dcn_v2_im2col_cuda.h
        │   │   │   │   │   ├── dcn_v2_im2col_cuda_double.cu
        │   │   │   │   │   ├── dcn_v2_im2col_cuda_double.h
        │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.cu
        │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda.h
        │   │   │   │   │   ├── dcn_v2_psroi_pooling_cuda_double.cu
        │   │   │   │   │   └── dcn_v2_psroi_pooling_cuda_double.h
        │   │   │   │   ├── dcn_v2.c
        │   │   │   │   ├── dcn_v2.h
        │   │   │   │   ├── dcn_v2_cuda.c
        │   │   │   │   ├── dcn_v2_cuda.h
        │   │   │   │   ├── dcn_v2_cuda_double.c
        │   │   │   │   ├── dcn_v2_cuda_double.h
        │   │   │   │   ├── dcn_v2_double.c
        │   │   │   │   └── dcn_v2_double.h
        │   │   │   └── test.py
        │   │   ├── dlav0.py
        │   │   ├── large_hourglass.py
        │   │   ├── msra_resnet.py
        │   │   ├── pose_dla_dcn.py
        │   │   ├── py_utils
        │   │   │   ├── __init__.py
        │   │   │   ├── _cpools
        │   │   │   │   ├── .gitignore
        │   │   │   │   ├── __init__.py
        │   │   │   │   ├── setup.py
        │   │   │   │   └── src
        │   │   │   │   │   ├── bottom_pool.cpp
        │   │   │   │   │   ├── left_pool.cpp
        │   │   │   │   │   ├── right_pool.cpp
        │   │   │   │   │   └── top_pool.cpp
        │   │   │   ├── data_parallel.py
        │   │   │   ├── losses.py
        │   │   │   ├── modules.py
        │   │   │   ├── scatter_gather.py
        │   │   │   └── utils.py
        │   │   └── resnet_dcn.py
        │   ├── scatter_gather.py
        │   └── utils.py
        ├── opts.py
        ├── trains
        │   ├── base_trainer.py
        │   ├── ctdet.py
        │   └── train_factory.py
        └── utils
        │   ├── __init__.py
        │   ├── ddd_utils.py
        │   ├── debugger.py
        │   ├── image.py
        │   ├── oracle_utils.py
        │   ├── post_process.py
        │   └── utils.py
    ├── test_HOI.py
    ├── timer.py
    ├── tools
        ├── _init_paths.py
        ├── calc_coco_overlap.py
        ├── convert_hourglass_weight.py
        ├── eval_coco.py
        ├── eval_coco_hp.py
        ├── reval.py
        └── vis_pred.py
    └── vsrl_eval.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Tiancai Wang
 4 | All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Learning Human-Object Interaction Detection using Interaction Points
 2 | 
 3 | Created by Tiancai Wang, Tong Yang, Martin Danelljan, Fahad Shahbaz Khan, Xiangyu Zhang, Jian Sun
 4 | 
 5 | Link for our paper: [arxiv](https://arxiv.org/abs/2003.14023) and [CVPR2020](http://openaccess.thecvf.com/content_CVPR_2020/html/Wang_Learning_Human-Object_Interaction_Detection_Using_Interaction_Points_CVPR_2020_paper.html)
 6 | 
 7 | ### Introduction
 8 | Understanding interactions between humans and objects is one of the fundamental problems in visual classification and an essential step  towards detailed scene understand-ing. Human-object interaction(HOI) detection strives to localize both the human and an object as well as the identification of complex interactions between them. Most existing HOI detection approaches are instance-centric where interactions between all possible human-object pairs are predicted based on appearance features and coarse spatial information. We argue that appearance features aloneare insufficient to capture complex human-object interactions. In this paper, we therefore propose a novel fully-convolutional approach that directly detects the interactions between human-object pairs. Our network predicts interaction points, which directly localize and classify the  interaction. Paired with the densely predicted interaction vectors, the interactions are associated with human and object detections to obtain final predictions. To the  best  of  ourknowledge, we are the first to propose an approach whereHOI detection is posed as a keypoint detection and group-ing problem. Experiments are performed on two popularbenchmarks: V-COCO and HICO-DET. Our approach sets a new state-of-the-art on both datasets. 
 9 | 
10 | ## Installation
11 | - Clone this repository. This repository is mainly based on [CenterNet](https://github.com/xingyizhou/CenterNet) and [iCAN](https://github.com/vt-vl-lab/iCAN).
12 | 
13 | ```Shell
14 |     IPNet_ROOT=/path/to/clone/IPNet
15 |     git clone https://github.com/vaesl/IP-Net $IPNet_ROOT
16 | ```
17 | - The code was tested on Ubuntu 18.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v1.0.1. 
18 | NVIDIA GPUs are needed for testing. After install Anaconda, create a new conda environment, activate the environment and install pytorch1.0.1.
19 | 
20 | ```Shell
21 |     conda create -n IPNet python=3.6
22 |     source activate IPNet
23 |     conda install pytorch=1.0.1 torchvision -c pytorch
24 | ```
25 | 
26 | - Install the requirements. 
27 | ```Shell
28 |     pip3 install -r requirements.txt
29 | ```
30 | - Compiling Center Pooling Layers.
31 | ```Shell
32 |     cd IPNet_ROOT/src/lib/models/networks/py_utils/_cpools/
33 |     python setup.py install --user
34 | ```
35 | 
36 | - Install [COCOAPI](https://github.com/cocodataset/cocoapi):
37 | 
38 |     ~~~
39 |     # COCOAPI=/path/to/clone/cocoapi
40 |     git clone https://github.com/cocodataset/cocoapi.git $COCOAPI
41 |     cd $COCOAPI/PythonAPI
42 |     make
43 |     python setup.py install --user
44 |     ~~~
45 | 
46 | ## Download
47 | To evaluate the performance reported in the paper, V-COCO and HICO-DET dataset as well as our trained models need to be downloaded.
48 | 
49 | ### V-COCO and HICO-DET Datasets
50 | Download datasets and setup evaluation and API, please follow [iCAN](https://github.com/vt-vl-lab/iCAN).
51 | 
52 | ### Trained Models
53 | Please access [Google Driver](https://drive.google.com/file/d/1stBqpTncUFfl-naKn4NONRmC-89jtdyh/view?usp=sharing) 
54 | to obtain our trained models for V-COCO and put the models into corresponding directory(e.g. '~/weights/V-COCO/'). 
55 | Note that we only release models of V-COCO  for the time being. 
56 | 
57 | ## Evaluation
58 | To check the performance reported in the paper, just simply run:
59 | 
60 | ```Shell
61 | python3 test_HOI.py ctdet --exp_id coco_hg --fix_res --arch hourglass --flip_test  --load_model /path/to/model/weights
62 | ```
63 | 
64 | ## Citation
65 | Please cite our paper in your publications if it helps your research:
66 | 
67 |     @article{Wang2020IPNet,
68 |         title = {Learning Human-Object Interaction Detection using Interaction Points},
69 |         author = {Tiancai Wang, Tong Yang, Martin Danelljan, Fahad Shahbaz Khan, Xiangyu Zhang, Jian Sun},
70 |         booktitle = {CVPR},
71 |         year = {2020}
72 |     }
73 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/exp/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/experiments/ctdet_coco_hg.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd src
 3 | # train
 4 | python main.py ctdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ExtremeNet_500000.pth --gpus 0,1,2,3,4
 5 | # test
 6 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume
 7 | # flip test
 8 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test 
 9 | # multi scale test
10 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5
11 | cd ..


--------------------------------------------------------------------------------
/models/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore
3 | 


--------------------------------------------------------------------------------
/src/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/apply_prior.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow iCAN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Chen Gao
 5 | # --------------------------------------------------------
 6 | 
 7 | def apply_prior(Object, prediction):
 8 |     
 9 |     
10 |     if Object[4] != 32: # not a snowboard, then the action is impossible to be snowboard
11 |         prediction[21] = 0
12 | 
13 |     if Object[4] != 74: # not a book, then the action is impossible to be read
14 |         prediction[24] = 0
15 | 
16 |     if Object[4] != 33: # not a sports ball, then the action is impossible to be kick
17 |         prediction[7] = 0   
18 | 
19 |     if (Object[4] != 41) and (Object[4] != 40) and (Object[4] != 42) and (Object[4] != 46): # not 'wine glass', 'bottle', 'cup', 'bowl', then the action is impossible to be drink
20 |         prediction[13] = 0       
21 | 
22 |     if Object[4] != 37: # not a skateboard, then the action is impossible to be skateboard
23 |         prediction[26] = 0    
24 | 
25 |     if Object[4] != 38: # not a surfboard, then the action is impossible to be surfboard
26 |         prediction[0] = 0  
27 |                             
28 |     if Object[4] != 31: # not a ski, then the action is impossible to be ski
29 |         prediction[1] = 0      
30 |                              
31 |     if Object[4] != 64: # not a laptop, then the action is impossible to be work on computer
32 |         prediction[8] = 0
33 |                         
34 |     if (Object[4] != 77) and (Object[4] != 43) and (Object[4] != 44): # not 'scissors', 'fork', 'knife', then the action is impossible to be cur instr
35 |         prediction[2] = 0
36 |                         
37 |     if (Object[4] != 33) and (Object[4] != 30): # not 'sports ball', 'frisbee', then the action is impossible to be throw and catch
38 |         prediction[15] = 0
39 |         prediction[28] = 0
40 |                               
41 |     if Object[4] != 68: # not a cellphone, then the action is impossible to be talk_on_phone
42 |         prediction[6] = 0   
43 |                             
44 |     if (Object[4] != 14) and (Object[4] != 61) and (Object[4] != 62) and (Object[4] != 60) and (Object[4] != 58)  and (Object[4] != 57): # not 'bench', 'dining table', 'toilet', 'bed', 'couch', 'chair', then the action is impossible to be lay
45 |         prediction[12] = 0
46 |                             
47 |     if (Object[4] != 32) and (Object[4] != 31) and (Object[4] != 37) and (Object[4] != 38): # not 'snowboard', 'skis', 'skateboard', 'surfboard', then the action is impossible to be jump
48 |         prediction[11] = 0   
49 |    
50 |     if (Object[4] != 47) and (Object[4] != 48) and (Object[4] != 49) and (Object[4] != 50) and (Object[4] != 51) and (Object[4] != 52) and (Object[4] != 53) and (Object[4] != 54) and (Object[4] != 55) and (Object[4] != 56): # not ''banana', 'apple', 'sandwich', 'orange', 'carrot', 'broccoli', 'hot dog', 'pizza', 'cake', 'donut', then the action is impossible to be eat_obj
51 |         prediction[9] = 0 
52 | 
53 |     if (Object[4] != 43) and (Object[4] != 44) and (Object[4] != 45): # not 'fork', 'knife', 'spoon', then the action is impossible to be eat_instr
54 |         prediction[16] = 0 
55 |             
56 |     if (Object[4] != 39) and (Object[4] != 35): # not 'tennis racket', 'baseball bat', then the action is impossible to be hit_instr
57 |         prediction[19] = 0 
58 | 
59 |     if (Object[4] != 33): # not 'sports ball, then the action is impossible to be hit_obj
60 |         prediction[20] = 0 
61 |                             
62 |                             
63 |     if (Object[4] != 2) and (Object[4] != 4) and (Object[4] != 6) and (Object[4] != 8) and (Object[4] != 9) and (Object[4] != 7) and (Object[4] != 5) and (Object[4] != 3) and (Object[4] != 18) and (Object[4] != 21): # not 'bicycle', 'motorcycle', 'bus', 'truck', 'boat', 'train', 'airplane', 'car', 'horse', 'elephant', then the action is impossible to be ride
64 |         prediction[5] = 0 
65 |                             
66 |     if (Object[4] != 2) and (Object[4] != 4) and (Object[4] != 18) and (Object[4] != 21) and (Object[4] != 14) and (Object[4] != 57) and (Object[4] != 58) and (Object[4] != 60) and (Object[4] != 62) and (Object[4] != 61) and (Object[4] != 29) and (Object[4] != 27) and (Object[4] != 25): # not 'bicycle', 'motorcycle', 'horse', 'elephant', 'bench', 'chair', 'couch', 'bed', 'toilet', 'dining table', 'suitcase', 'handbag', 'backpack', then the action is impossible to be sit
67 |         prediction[10] = 0 
68 |         
69 |     if (Object[4] == 1):
70 |         prediction[4] = 0 
71 |     
72 |     return prediction
73 |                             
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | 
 11 | import torch.utils.data as data
 12 | 
 13 | 
 14 | class COCO(data.Dataset):
 15 |   num_obj_classes = 80
 16 |   num_act_classes = 29
 17 |   default_resolution = [512, 512]
 18 |   mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(1, 1, 3)
 19 |   std  = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(1, 1, 3)
 20 | 
 21 |   def __init__(self, opt, split):
 22 |     super(COCO, self).__init__()
 23 |     self.data_dir = os.path.join(opt.data_dir, 'vcoco')
 24 |     if split == 'test':
 25 |       self.annot_path = os.path.join(
 26 |           self.data_dir, 'annotations', 'instances_vcoco_test2014.json')
 27 |     else:
 28 |         self.annot_path = os.path.join(
 29 |           self.data_dir, 'annotations',  'instances_hoi_action_point_iCAN.json')
 30 |     self.max_objs = 128
 31 |     self.class_name = [
 32 |       '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
 33 |       'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
 34 |       'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
 35 |       'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
 36 |       'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
 37 |       'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
 38 |       'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
 39 |       'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
 40 |       'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
 41 |       'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
 42 |       'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
 43 |       'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 44 |       'scissors', 'teddy bear', 'hair drier', 'toothbrush']
 45 |     self._valid_ids = [
 46 |       1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13,
 47 |       14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
 48 |       24, 25, 27, 28, 31, 32, 33, 34, 35, 36,
 49 |       37, 38, 39, 40, 41, 42, 43, 44, 46, 47,
 50 |       48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
 51 |       58, 59, 60, 61, 62, 63, 64, 65, 67, 70,
 52 |       72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
 53 |       82, 84, 85, 86, 87, 88, 89, 90]
 54 |     self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)}
 55 |     self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \
 56 |                       for v in range(1, self.num_obj_classes + 1)]
 57 |     self._data_rng = np.random.RandomState(123)
 58 |     self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
 59 |                              dtype=np.float32)
 60 |     self._eig_vec = np.array([
 61 |         [-0.58752847, -0.69563484, 0.41340352],
 62 |         [-0.5832747, 0.00994535, -0.81221408],
 63 |         [-0.56089297, 0.71832671, 0.41158938]
 64 |     ], dtype=np.float32)
 65 |     # self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3)
 66 |     # self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3)
 67 | 
 68 |     self.split = split
 69 |     self.opt = opt
 70 | 
 71 |     print('==> initializing coco 2014 {} data.'.format(split))
 72 |     self.coco = coco.COCO(self.annot_path)
 73 |     self.images = self.coco.getImgIds()
 74 |     self.num_samples = len(self.images)
 75 | 
 76 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 77 | 
 78 |   def _to_float(self, x):
 79 |     return float("{:.2f}".format(x))
 80 | 
 81 |   def convert_eval_format(self, all_bboxes):
 82 |     # import pdb; pdb.set_trace()
 83 |     detections = []
 84 |     for image_id in all_bboxes:
 85 |       for cls_ind in all_bboxes[image_id]:
 86 |         category_id = self._valid_ids[cls_ind - 1]
 87 |         for bbox in all_bboxes[image_id][cls_ind]:
 88 |           bbox[2] -= bbox[0]
 89 |           bbox[3] -= bbox[1]
 90 |           score = bbox[4]
 91 |           bbox_out  = list(map(self._to_float, bbox[0:4]))
 92 | 
 93 |           detection = {
 94 |               "image_id": int(image_id),
 95 |               "category_id": int(category_id),
 96 |               "bbox": bbox_out,
 97 |               "score": float("{:.2f}".format(score))
 98 |           }
 99 |           if len(bbox) > 5:
100 |               extreme_points = list(map(self._to_float, bbox[5:13]))
101 |               detection["extreme_points"] = extreme_points
102 |           detections.append(detection)
103 |     return detections
104 | 
105 |   def __len__(self):
106 |     return self.num_samples
107 | 
108 |   def save_results(self, results, save_dir):
109 |     json.dump(self.convert_eval_format(results),
110 |                 open('{}/results.json'.format(save_dir), 'w'))
111 | 
112 |   def run_eval(self, results, save_dir):
113 |     # result_json = os.path.join(save_dir, "results.json")
114 |     # detections  = self.convert_eval_format(results)
115 |     # json.dump(detections, open(result_json, "w"))
116 |     self.save_results(results, save_dir)
117 |     coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
118 |     coco_eval = COCOeval(self.coco, coco_dets, "bbox")
119 |     coco_eval.evaluate()
120 |     coco_eval.accumulate()
121 |     coco_eval.summarize()
122 | 


--------------------------------------------------------------------------------
/src/lib/datasets/dataset/coco_hp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import pycocotools.coco as coco
  6 | from pycocotools.cocoeval import COCOeval
  7 | import numpy as np
  8 | import json
  9 | import os
 10 | 
 11 | import torch.utils.data as data
 12 | 
 13 | class COCOHP(data.Dataset):
 14 |   num_classes = 1
 15 |   num_joints = 17
 16 |   default_resolution = [512, 512]
 17 |   mean = np.array([0.40789654, 0.44719302, 0.47026115],
 18 |                    dtype=np.float32).reshape(1, 1, 3)
 19 |   std  = np.array([0.28863828, 0.27408164, 0.27809835],
 20 |                    dtype=np.float32).reshape(1, 1, 3)
 21 |   flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 
 22 |               [11, 12], [13, 14], [15, 16]]
 23 |   def __init__(self, opt, split):
 24 |     super(COCOHP, self).__init__()
 25 |     self.edges = [[0, 1], [0, 2], [1, 3], [2, 4], 
 26 |                   [4, 6], [3, 5], [5, 6], 
 27 |                   [5, 7], [7, 9], [6, 8], [8, 10], 
 28 |                   [6, 12], [5, 11], [11, 12], 
 29 |                   [12, 14], [14, 16], [11, 13], [13, 15]]
 30 |     
 31 |     self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
 32 |     self.data_dir = os.path.join(opt.data_dir, 'coco')
 33 |     self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split))
 34 |     if split == 'test':
 35 |       self.annot_path = os.path.join(
 36 |           self.data_dir, 'annotations', 
 37 |           'image_info_test-dev2017.json').format(split)
 38 |     else:
 39 |       self.annot_path = os.path.join(
 40 |         self.data_dir, 'annotations', 
 41 |         'person_keypoints_{}2017.json').format(split)
 42 |     self.max_objs = 32
 43 |     self._data_rng = np.random.RandomState(123)
 44 |     self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571],
 45 |                              dtype=np.float32)
 46 |     self._eig_vec = np.array([
 47 |         [-0.58752847, -0.69563484, 0.41340352],
 48 |         [-0.5832747, 0.00994535, -0.81221408],
 49 |         [-0.56089297, 0.71832671, 0.41158938]
 50 |     ], dtype=np.float32)
 51 |     self.split = split
 52 |     self.opt = opt
 53 | 
 54 |     print('==> initializing coco 2017 {} data.'.format(split))
 55 |     self.coco = coco.COCO(self.annot_path)
 56 |     image_ids = self.coco.getImgIds()
 57 | 
 58 |     if split == 'train':
 59 |       self.images = []
 60 |       for img_id in image_ids:
 61 |         idxs = self.coco.getAnnIds(imgIds=[img_id])
 62 |         if len(idxs) > 0:
 63 |           self.images.append(img_id)
 64 |     else:
 65 |       self.images = image_ids
 66 |     self.num_samples = len(self.images)
 67 |     print('Loaded {} {} samples'.format(split, self.num_samples))
 68 | 
 69 |   def _to_float(self, x):
 70 |     return float("{:.2f}".format(x))
 71 | 
 72 |   def convert_eval_format(self, all_bboxes):
 73 |     # import pdb; pdb.set_trace()
 74 |     detections = []
 75 |     for image_id in all_bboxes:
 76 |       for cls_ind in all_bboxes[image_id]:
 77 |         category_id = 1
 78 |         for dets in all_bboxes[image_id][cls_ind]:
 79 |           bbox = dets[:4]
 80 |           bbox[2] -= bbox[0]
 81 |           bbox[3] -= bbox[1]
 82 |           score = dets[4]
 83 |           bbox_out  = list(map(self._to_float, bbox))
 84 |           keypoints = np.concatenate([
 85 |             np.array(dets[5:39], dtype=np.float32).reshape(-1, 2), 
 86 |             np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist()
 87 |           keypoints  = list(map(self._to_float, keypoints))
 88 | 
 89 |           detection = {
 90 |               "image_id": int(image_id),
 91 |               "category_id": int(category_id),
 92 |               "bbox": bbox_out,
 93 |               "score": float("{:.2f}".format(score)),
 94 |               "keypoints": keypoints
 95 |           }
 96 |           detections.append(detection)
 97 |     return detections
 98 | 
 99 |   def __len__(self):
100 |     return self.num_samples
101 | 
102 |   def save_results(self, results, save_dir):
103 |     json.dump(self.convert_eval_format(results), 
104 |               open('{}/results.json'.format(save_dir), 'w'))
105 | 
106 | 
107 |   def run_eval(self, results, save_dir):
108 |     # result_json = os.path.join(opt.save_dir, "results.json")
109 |     # detections  = convert_eval_format(all_boxes)
110 |     # json.dump(detections, open(result_json, "w"))
111 |     self.save_results(results, save_dir)
112 |     coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir))
113 |     coco_eval = COCOeval(self.coco, coco_dets, "keypoints")
114 |     coco_eval.evaluate()
115 |     coco_eval.accumulate()
116 |     coco_eval.summarize()
117 |     coco_eval = COCOeval(self.coco, coco_dets, "bbox")
118 |     coco_eval.evaluate()
119 |     coco_eval.accumulate()
120 |     coco_eval.summarize()


--------------------------------------------------------------------------------
/src/lib/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from .sample.ctdet import CTDetDataset
 6 | 
 7 | from .dataset.coco import COCO
 8 | from .dataset.coco_hp import COCOHP
 9 | 
10 | 
11 | dataset_factory = {
12 |   'coco': COCO,
13 |   'coco_hp': COCOHP
14 | }
15 | 
16 | _sample_factory = {
17 |   'ctdet': CTDetDataset,
18 | }
19 | 
20 | 
21 | def get_dataset(dataset, task):
22 |   class Dataset(dataset_factory[dataset], _sample_factory[task]):
23 |     pass
24 |   return Dataset
25 | 


--------------------------------------------------------------------------------
/src/lib/datasets/sample/ctdet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch.utils.data as data
  6 | import numpy as np
  7 | import torch
  8 | import json
  9 | import cv2
 10 | import os
 11 | from utils.image import flip, color_aug
 12 | from utils.image import get_affine_transform, affine_transform
 13 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
 14 | from utils.image import draw_dense_reg
 15 | import math
 16 | import boto3
 17 | import io
 18 | import Image
 19 | 
 20 | host = "http://oss.wuhu-a.brainpp.cn"
 21 | s3_client = boto3.client('s3', endpoint_url=host)
 22 | 
 23 | 
 24 | class CTDetDataset(data.Dataset):
 25 |   def _coco_box_to_bbox(self, box):
 26 |     bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
 27 |                     dtype=np.float32)
 28 |     return bbox
 29 | 
 30 |   def _get_border(self, border, size):
 31 |     i = 1
 32 |     while size - border // i <= border // i:
 33 |         i *= 2
 34 |     return border // i
 35 | 
 36 |   def __getitem__(self, index):
 37 |     img_id = self.images[index]
 38 |     file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name']
 39 |     img_dir = 'coco/images/trainval2017/'
 40 |     img_path = os.path.join(img_dir, file_name)
 41 |     ann_ids = self.coco.getAnnIds(imgIds=[img_id])
 42 |     anns = self.coco.loadAnns(ids=ann_ids)
 43 |     num_objs = min(len(anns), self.max_objs)
 44 | 
 45 |     img_obj = s3_client.get_object(Bucket="wangtiancai", Key=img_path)
 46 |     img = np.array(Image.open(io.BytesIO(img_obj['Body'].read())).convert('RGB'), dtype=np.float32)
 47 | 
 48 |     height, width = img.shape[0], img.shape[1]
 49 |     c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32)
 50 |     if self.opt.keep_res:
 51 |       input_h = (height | self.opt.pad) + 1
 52 |       input_w = (width | self.opt.pad) + 1
 53 |       s = np.array([input_w, input_h], dtype=np.float32)
 54 |     else:
 55 |       s = max(img.shape[0], img.shape[1]) * 1.0
 56 |       input_h, input_w = self.opt.input_h, self.opt.input_w
 57 | 
 58 |     flipped = False
 59 |     if self.split == 'train':
 60 |       if not self.opt.not_rand_crop:
 61 |         s = s * np.random.choice(np.arange(0.6, 1.4, 0.1))
 62 |         w_border = self._get_border(128, img.shape[1])
 63 |         h_border = self._get_border(128, img.shape[0])
 64 |         c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border)
 65 |         c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border)
 66 |       else:
 67 |         sf = self.opt.scale
 68 |         cf = self.opt.shift
 69 |         c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
 70 |         c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf)
 71 |         s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
 72 | 
 73 |       if np.random.random() < self.opt.flip:
 74 |         flipped = True
 75 |         # print(img.shape)
 76 |         img = img[:, ::-1, :]
 77 |         c[0] = width - c[0] - 1
 78 | 
 79 |     trans_input = get_affine_transform(
 80 |       c, s, 0, [input_w, input_h])
 81 |     inp = cv2.warpAffine(img, trans_input,
 82 |                          (input_w, input_h),
 83 |                          flags=cv2.INTER_LINEAR)
 84 |     inp = (inp.astype(np.float32) / 255.)
 85 |     if self.split == 'train' and not self.opt.no_color_aug:
 86 |       color_aug(self._data_rng, inp, self._eig_val, self._eig_vec)
 87 |     inp = (inp - self.mean) / self.std
 88 |     inp = inp.transpose(2, 0, 1)
 89 | 
 90 |     output_h = input_h // self.opt.down_ratio
 91 |     output_w = input_w // self.opt.down_ratio
 92 |     num_obj_classes = self.num_obj_classes
 93 |     num_act_classes = self.num_act_classes
 94 |     trans_output = get_affine_transform(c, s, 0, [output_w, output_h])
 95 | 
 96 |     hm = np.zeros((num_obj_classes, output_h, output_w), dtype=np.float32)
 97 |     hm_act = np.zeros((num_act_classes, output_h, output_w), dtype=np.float32)
 98 |     wh = np.zeros((self.max_objs, 2), dtype=np.float32)
 99 |     wh_act = np.zeros((self.max_objs, 2), dtype=np.float32)
100 |     dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32)
101 |     reg = np.zeros((self.max_objs, 2), dtype=np.float32)
102 |     ind = np.zeros((self.max_objs), dtype=np.int64)
103 |     ind_act = np.zeros((self.max_objs), dtype=np.int64)
104 |     reg_mask = np.zeros((self.max_objs), dtype=np.uint8)
105 |     reg_act_mask = np.zeros((self.max_objs), dtype=np.uint8)
106 |     cat_spec_wh = np.zeros((self.max_objs, num_obj_classes * 2), dtype=np.float32)
107 |     cat_spec_mask = np.zeros((self.max_objs, num_obj_classes * 2), dtype=np.uint8)
108 | 
109 |     draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \
110 |                     draw_umich_gaussian
111 | 
112 |     gt_det = []
113 | 
114 |     p = 0
115 | 
116 |     for k in range(num_objs):
117 |       ann = anns[k]
118 |       bbox = self._coco_box_to_bbox(ann['bbox'])
119 |       cls_id = int(self.cat_ids[ann['category_id']])
120 | 
121 |       if flipped:
122 |         bbox[[0, 2]] = width - bbox[[2, 0]] - 1
123 |       bbox[:2] = affine_transform(bbox[:2], trans_output)
124 |       bbox[2:] = affine_transform(bbox[2:], trans_output)
125 |       bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
126 |       bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
127 | 
128 |       h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
129 |       if h > 0 and w > 0:
130 |         radius = gaussian_radius((math.ceil(h), math.ceil(w)))
131 |         radius = max(0, int(radius))
132 |         radius = self.opt.hm_gauss if self.opt.mse_loss else radius
133 |         ct = np.array(
134 |           [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
135 |         ct_int = ct.astype(np.int32)
136 |         draw_gaussian(hm[cls_id], ct_int, radius)
137 |         wh[k] = 1. * w, 1. * h
138 |         ind[k] = ct_int[1] * output_w + ct_int[0]
139 |         reg[k] = ct - ct_int
140 |         reg_mask[k] = 1
141 |         cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k]
142 |         cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1
143 |         if self.opt.dense_wh:
144 |           draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius)
145 |         gt_det.append([ct[0] - w / 2, ct[1] - h / 2,
146 |                        ct[0] + w / 2, ct[1] + h / 2, 1, cls_id])
147 | 
148 |         if ann['category_id'] == 1:
149 |           if len(ann['bbox']) != 4:
150 |             for cls_id in ann['bbox'][4:]:
151 |               draw_gaussian(hm_act[cls_id], ct_int, radius)
152 | 
153 |             # h_act = h, w_act = w
154 |             # wh_act[p] = 1. * w, 1. * h
155 |             # ind_act[p] = ct_int[1] * output_w + ct_int[0]
156 |             # reg_act_mask[p] = 1
157 |             # p += 1
158 | 
159 |           if ann['obj_bbox'] != []:
160 |             for i, obbox in enumerate(ann['obj_bbox']):
161 |               o_bbox = self._coco_box_to_bbox(obbox[:4])
162 |               o_act = obbox[4:]
163 |               o_bbox = np.array(o_bbox)
164 |               if flipped:
165 |                 o_bbox[[0, 2]] = width - o_bbox[[2, 0]] - 1
166 |               o_bbox[:2] = affine_transform(o_bbox[:2], trans_output)
167 |               o_bbox[2:] = affine_transform(o_bbox[2:], trans_output)
168 |               o_bbox[[0, 2]] = np.clip(o_bbox[[0, 2]], 0, output_w - 1)
169 |               o_bbox[[1, 3]] = np.clip(o_bbox[[1, 3]], 0, output_h - 1)
170 | 
171 |               o_h, o_w = o_bbox[3] - o_bbox[1], o_bbox[2] - o_bbox[0]
172 | 
173 |               if o_h > 0 and o_w > 0:
174 |                 # radius = gaussian_radius((math.ceil(o_h), math.ceil(o_w)))
175 |                 # radius = max(0, int(radius))
176 |                 radius = 10
177 |                 radius = self.opt.hm_gauss if self.opt.mse_loss else radius
178 | 
179 |                 o_ct = np.array(
180 |                   [(o_bbox[0] + o_bbox[2]) / 2, (o_bbox[1] + o_bbox[3]) / 2], dtype=np.float32)
181 |                 act_ct = (ct + o_ct) / 2
182 |                 act_ct_int = act_ct.astype(np.int32)
183 | 
184 |                 h_act, w_act = abs(ct[1] - o_ct[1]), abs(ct[0] - o_ct[0])
185 |                 wh_act[p] = 1. * w_act, 1. * h_act
186 |                 ind_act[p] = act_ct_int[1] * output_w + act_ct_int[0]
187 |                 reg_act_mask[p] = 1
188 |                 p += 1
189 | 
190 |                 for cls_id in o_act:
191 |                     draw_gaussian(hm_act[cls_id], act_ct_int, radius)
192 | 
193 |     ret = {'input': inp, 'hm_act': hm_act, 'wh_act':wh_act, 'ind_act':ind_act, 'reg_act_mask':reg_act_mask}
194 | 
195 |     if self.opt.dense_wh:
196 |       hm_a = hm.max(axis=0, keepdims=True)
197 |       dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0)
198 |       ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask})
199 |       del ret['wh']
200 |     elif self.opt.cat_spec_wh:
201 |       ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask})
202 |       del ret['wh']
203 |     if self.opt.reg_offset:
204 |       ret.update({'reg': reg})
205 |     if self.opt.debug > 0 or not self.split == 'train':
206 |       gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \
207 |                np.zeros((1, 6), dtype=np.float32)
208 |       meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id}
209 |       ret['meta'] = meta
210 |     return ret
211 | 


--------------------------------------------------------------------------------
/src/lib/detectors/base_detector.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from progress.bar import Bar
  8 | import time
  9 | import torch
 10 | 
 11 | from models.model import create_model, load_model
 12 | from utils.image import get_affine_transform
 13 | from utils.debugger import Debugger
 14 | 
 15 | import boto3
 16 | import io
 17 | import Image
 18 | 
 19 | host = "http://oss.wuhu-a.brainpp.cn"
 20 | s3_client = boto3.client('s3', endpoint_url=host)
 21 | 
 22 | 
 23 | class BaseDetector(object):
 24 |   def __init__(self, opt):
 25 |     if opt.gpus[0] >= 0:
 26 |       opt.device = torch.device('cuda')
 27 |     else:
 28 |       opt.device = torch.device('cpu')
 29 | 
 30 |     print('Creating model...')
 31 |     self.model = create_model(opt.arch, opt.heads, opt.head_conv)
 32 |     self.model = load_model(self.model, opt.load_model)
 33 |     self.model = self.model.to(opt.device)
 34 |     self.model.eval()
 35 | 
 36 |     self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3)
 37 |     self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3)
 38 |     self.max_per_image = 100
 39 |     self.num_obj_classes = opt.num_obj_classes
 40 |     self.num_act_classes = opt.num_act_classes
 41 |     self.scales = opt.test_scales
 42 |     self.opt = opt
 43 |     self.pause = True
 44 | 
 45 |   def pre_process(self, image, scale, meta=None):
 46 |     height, width = image.shape[0:2]
 47 |     new_height = int(height * scale)
 48 |     new_width  = int(width * scale)
 49 |     if self.opt.fix_res:
 50 |       inp_height, inp_width = self.opt.input_h, self.opt.input_w
 51 |       c = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
 52 |       s = max(height, width) * 1.0
 53 |     else:
 54 |       inp_height = (new_height | self.opt.pad) + 1
 55 |       inp_width = (new_width | self.opt.pad) + 1
 56 |       c = np.array([new_width // 2, new_height // 2], dtype=np.float32)
 57 |       s = np.array([inp_width, inp_height], dtype=np.float32)
 58 | 
 59 |     trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
 60 |     resized_image = cv2.resize(image, (new_width, new_height))
 61 |     inp_image = cv2.warpAffine(
 62 |       resized_image, trans_input, (inp_width, inp_height),
 63 |       flags=cv2.INTER_LINEAR)
 64 |     inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
 65 | 
 66 |     images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
 67 |     if self.opt.flip_test:
 68 |       images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
 69 |     images = torch.from_numpy(images)
 70 |     meta = {'c': c, 's': s,
 71 |             'out_height': inp_height // self.opt.down_ratio,
 72 |             'out_width': inp_width // self.opt.down_ratio}
 73 |     return images, meta
 74 | 
 75 |   def process(self, images, return_time=False):
 76 |     raise NotImplementedError
 77 | 
 78 |   def post_process(self, dets_act, meta, scale=1):
 79 |     raise NotImplementedError
 80 | 
 81 |   def merge_outputs(self, detections):
 82 |     raise NotImplementedError
 83 | 
 84 |   def debug(self, debugger, images, dets, output, scale=1):
 85 |     raise NotImplementedError
 86 | 
 87 |   def show_results(self, debugger, image, results):
 88 |    raise NotImplementedError
 89 | 
 90 |   def run(self, img_path, meta=None):
 91 |     pre_processed = False
 92 |     # if isinstance(image_or_path_or_tensor, np.ndarray):
 93 |     #  image = image_or_path_or_tensor
 94 |     # elif type(image_or_path_or_tensor) == type (''):
 95 |     #  image = cv2.imread(image_or_path_or_tensor)
 96 |     # else:
 97 |     #  image = image_or_path_or_tensor['image'][0].numpy()
 98 |     #  pre_processed_images = image_or_path_or_tensor
 99 |     #  pre_processed = True
100 | 
101 |     img_obj = s3_client.get_object(Bucket="wangtiancai", Key=img_path)
102 |     image = np.array(Image.open(io.BytesIO(img_obj['Body'].read())).convert('RGB'), dtype=np.float32)
103 | 
104 |     results = []
105 |     for scale in self.scales:
106 |       scale_start_time = time.time()
107 |       if not pre_processed:
108 |         images, meta = self.pre_process(image, scale, meta)
109 |       else:
110 |         # import pdb; pdb.set_trace()
111 |         images = pre_processed_images['images'][scale][0]
112 |         meta = pre_processed_images['meta'][scale]
113 |         meta = {k: v.numpy()[0] for k, v in meta.items()}
114 |       images = images.to(self.opt.device)
115 | 
116 |       # print(images.shape)
117 | 
118 |       output, dets_act, forward_time = self.process(images, return_time=True)
119 | 
120 |       dets_act = self.post_process(dets_act, meta, scale)
121 | 
122 |       results.append(dets_act)
123 | 
124 |     return results
125 | 


--------------------------------------------------------------------------------
/src/lib/detectors/ctdet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | from progress.bar import Bar
  8 | import time
  9 | import torch
 10 | 
 11 | # from external.nms import soft_nms
 12 | from models.decode import ctdet_decode
 13 | from models.utils import flip_tensor
 14 | from utils.image import get_affine_transform
 15 | from utils.post_process import ctdet_post_process
 16 | from utils.debugger import Debugger
 17 | 
 18 | from .base_detector import BaseDetector
 19 | 
 20 | 
 21 | class CtdetDetector(BaseDetector):
 22 |   def __init__(self, opt):
 23 |     super(CtdetDetector, self).__init__(opt)
 24 | 
 25 |   def process(self, images, return_time=False):
 26 |     with torch.no_grad():
 27 |       output = self.model(images)[-1]
 28 |       hm_act = output['hm_act_f'].sigmoid_()
 29 |       reg_act = None
 30 |       wh_act = output['wh_act']
 31 |       if self.opt.flip_test:
 32 |         hm_act = (hm_act[0:1] + flip_tensor(hm_act[1:2])) / 2
 33 |         wh_act = (wh_act[0:1] + flip_tensor(wh_act[1:2])) / 2
 34 |       torch.cuda.synchronize()
 35 |       forward_time = time.time()
 36 |       dets_act = ctdet_decode(hm_act, wh_act, reg_act=reg_act,  K=self.opt.K)
 37 | 
 38 |     if return_time:
 39 |       return output, dets_act, forward_time
 40 |     else:
 41 |       return output, dets_act
 42 | 
 43 | 
 44 |   def post_process(self, dets_act, meta, scale=1):
 45 |     dets_act = dets_act.detach().cpu().numpy()
 46 | 
 47 |     dets_act = dets_act.reshape(1, -1, dets_act.shape[2])
 48 | 
 49 |     dets_act = ctdet_post_process(
 50 |         dets_act.copy(), [meta['c']], [meta['s']],
 51 |         meta['out_height'], meta['out_width'], self.opt.num_obj_classes, self.opt.num_act_classes)
 52 |     # print(dets_act)
 53 | 
 54 |     # for j in range(1, self.num_obj_classes + 1):
 55 |     #   dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5)
 56 |     #   dets[0][j][:, :4] /= scale
 57 |     for j in range(1, self.num_act_classes + 1):
 58 |       dets_act[0][j] = np.array(dets_act[0][j], dtype=np.float32).reshape(-1, 7)
 59 |       dets_act[0][j][:, :6] /= scale
 60 | 
 61 |     # print(dets_act[0])
 62 |     return dets_act[0]
 63 | 
 64 |   def merge_outputs(self, detections):
 65 |     results = {}
 66 |     for j in range(1, self.num_obj_classes + 1):
 67 |       results[j] = np.concatenate(
 68 |         [detection[j] for detection in detections], axis=0).astype(np.float32)
 69 |       if len(self.scales) > 1 or self.opt.nms:
 70 |          soft_nms(results[j], Nt=0.5, method=2)
 71 |     scores = np.hstack(
 72 |       [results[j][:, 4] for j in range(1, self.num_obj_classes + 1)])
 73 |     if len(scores) > self.max_per_image:
 74 |       kth = len(scores) - self.max_per_image
 75 |       thresh = np.partition(scores, kth)[kth]
 76 |       for j in range(1, self.num_obj_classes + 1):
 77 |         keep_inds = (results[j][:, 4] >= thresh)
 78 |         results[j] = results[j][keep_inds]
 79 |     return results
 80 | 
 81 |   def debug(self, debugger, images, dets, output, scale=1):
 82 |     detection = dets.detach().cpu().numpy().copy()
 83 |     detection[:, :, :4] *= self.opt.down_ratio
 84 |     for i in range(1):
 85 |       img = images[i].detach().cpu().numpy().transpose(1, 2, 0)
 86 |       img = ((img * self.std + self.mean) * 255).astype(np.uint8)
 87 |       pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
 88 |       debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale))
 89 |       debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale))
 90 |       for k in range(len(dets[i])):
 91 |         if detection[i, k, 4] > self.opt.center_thresh:
 92 |           debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1],
 93 |                                  detection[i, k, 4],
 94 |                                  img_id='out_pred_{:.1f}'.format(scale))
 95 | 
 96 |   def show_results(self, debugger, image, results):
 97 |     debugger.add_img(image, img_id='ctdet')
 98 |     for j in range(1, self.num_obj_classes + 1):
 99 |       for bbox in results[j]:
100 |         if bbox[4] > self.opt.vis_thresh:
101 |           debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='ctdet')
102 |     debugger.show_all_imgs(pause=self.pause)
103 | 


--------------------------------------------------------------------------------
/src/lib/detectors/detector_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from .ctdet import CtdetDetector
 6 | 
 7 | detector_factory = {
 8 |   'ctdet': CtdetDetector
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/src/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/src/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/external/__init__.py


--------------------------------------------------------------------------------
/src/lib/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "nms", 
 9 |         ["nms.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     name="coco",
16 |     ext_modules=cythonize(extensions),
17 |     include_dirs=[numpy.get_include()]
18 | )
19 | 


--------------------------------------------------------------------------------
/src/lib/logger.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 6 | import os
 7 | import time
 8 | import sys
 9 | import torch
10 | USE_TENSORBOARD = True
11 | try:
12 |   import tensorboardX
13 |   print('Using tensorboardX')
14 | except:
15 |   USE_TENSORBOARD = False
16 | 
17 | class Logger(object):
18 |   def __init__(self, opt):
19 |     """Create a summary writer logging to log_dir."""
20 |     if not os.path.exists(opt.save_dir):
21 |       os.makedirs(opt.save_dir)
22 |     if not os.path.exists(opt.debug_dir):
23 |       os.makedirs(opt.debug_dir)
24 |    
25 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
26 | 
27 |     args = dict((name, getattr(opt, name)) for name in dir(opt)
28 |                 if not name.startswith('_'))
29 |     file_name = os.path.join(opt.save_dir, 'opt.txt')
30 |     with open(file_name, 'wt') as opt_file:
31 |       opt_file.write('==> torch version: {}\n'.format(torch.__version__))
32 |       opt_file.write('==> cudnn version: {}\n'.format(
33 |         torch.backends.cudnn.version()))
34 |       opt_file.write('==> Cmd:\n')
35 |       opt_file.write(str(sys.argv))
36 |       opt_file.write('\n==> Opt:\n')
37 |       for k, v in sorted(args.items()):
38 |         opt_file.write('  %s: %s\n' % (str(k), str(v)))
39 |           
40 |     log_dir = opt.save_dir + '/logs_{}'.format(time_str)
41 |     if USE_TENSORBOARD:
42 |       self.writer = tensorboardX.SummaryWriter(log_dir=log_dir)
43 |     else:
44 |       if not os.path.exists(os.path.dirname(log_dir)):
45 |         os.mkdir(os.path.dirname(log_dir))
46 |       if not os.path.exists(log_dir):
47 |         os.mkdir(log_dir)
48 |     self.log = open(log_dir + '/log.txt', 'w')
49 |     try:
50 |       os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir))
51 |     except:
52 |       pass
53 |     self.start_line = True
54 | 
55 |   def write(self, txt):
56 |     if self.start_line:
57 |       time_str = time.strftime('%Y-%m-%d-%H-%M')
58 |       self.log.write('{}: {}'.format(time_str, txt))
59 |     else:
60 |       self.log.write(txt)  
61 |     self.start_line = False
62 |     if '\n' in txt:
63 |       self.start_line = True
64 |       self.log.flush()
65 |   
66 |   def close(self):
67 |     self.log.close()
68 |   
69 |   def scalar_summary(self, tag, value, step):
70 |     """Log a scalar variable."""
71 |     if USE_TENSORBOARD:
72 |       self.writer.add_scalar(tag, value, step)
73 | 


--------------------------------------------------------------------------------
/src/lib/models/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | 
  8 | from .scatter_gather import scatter_kwargs
  9 | 
 10 | class _DataParallel(Module):
 11 |     r"""Implements data parallelism at the module level.
 12 | 
 13 |     This container parallelizes the application of the given module by
 14 |     splitting the input across the specified devices by chunking in the batch
 15 |     dimension. In the forward pass, the module is replicated on each device,
 16 |     and each replica handles a portion of the input. During the backwards
 17 |     pass, gradients from each replica are summed into the original module.
 18 | 
 19 |     The batch size should be larger than the number of GPUs used. It should
 20 |     also be an integer multiple of the number of GPUs so that each chunk is the
 21 |     same size (so that each GPU processes the same number of samples).
 22 | 
 23 |     See also: :ref:`cuda-nn-dataparallel-instead`
 24 | 
 25 |     Arbitrary positional and keyword inputs are allowed to be passed into
 26 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 27 |     specified (default 0). Primitive types will be broadcasted, but all
 28 |     other types will be a shallow copy and can be corrupted if written to in
 29 |     the model's forward pass.
 30 | 
 31 |     Args:
 32 |         module: module to be parallelized
 33 |         device_ids: CUDA devices (default: all devices)
 34 |         output_device: device location of output (default: device_ids[0])
 35 | 
 36 |     Example::
 37 | 
 38 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 39 |         >>> output = net(input_var)
 40 |     """
 41 | 
 42 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 43 | 
 44 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 45 |         super(_DataParallel, self).__init__()
 46 | 
 47 |         if not torch.cuda.is_available():
 48 |             self.module = module
 49 |             self.device_ids = []
 50 |             return
 51 | 
 52 |         if device_ids is None:
 53 |             device_ids = list(range(torch.cuda.device_count()))
 54 |         if output_device is None:
 55 |             output_device = device_ids[0]
 56 |         self.dim = dim
 57 |         self.module = module
 58 |         self.device_ids = device_ids
 59 |         self.chunk_sizes = chunk_sizes
 60 |         self.output_device = output_device
 61 |         if len(self.device_ids) == 1:
 62 |             self.module.cuda(device_ids[0])
 63 | 
 64 |     def forward(self, *inputs, **kwargs):
 65 |         if not self.device_ids:
 66 |             return self.module(*inputs, **kwargs)
 67 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 68 |         if len(self.device_ids) == 1:
 69 |             return self.module(*inputs[0], **kwargs[0])
 70 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 71 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 72 |         return self.gather(outputs, self.output_device)
 73 | 
 74 |     def replicate(self, module, device_ids):
 75 |         return replicate(module, device_ids)
 76 | 
 77 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 78 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 79 | 
 80 |     def parallel_apply(self, replicas, inputs, kwargs):
 81 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 82 | 
 83 |     def gather(self, outputs, output_device):
 84 |         return gather(outputs, output_device, dim=self.dim)
 85 | 
 86 | 
 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 88 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 89 | 
 90 |     This is the functional version of the DataParallel module.
 91 | 
 92 |     Args:
 93 |         module: the module to evaluate in parallel
 94 |         inputs: inputs to the module
 95 |         device_ids: GPU ids on which to replicate module
 96 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 97 |             (default: device_ids[0])
 98 |     Returns:
 99 |         a Variable containing the result of module(input) located on
100 |         output_device
101 |     """
102 |     if not isinstance(inputs, tuple):
103 |         inputs = (inputs,)
104 | 
105 |     if device_ids is None:
106 |         device_ids = list(range(torch.cuda.device_count()))
107 | 
108 |     if output_device is None:
109 |         output_device = device_ids[0]
110 | 
111 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
112 |     if len(device_ids) == 1:
113 |         return module(*inputs[0], **module_kwargs[0])
114 |     used_device_ids = device_ids[:len(inputs)]
115 |     replicas = replicate(module, used_device_ids)
116 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
117 |     return gather(outputs, output_device, dim)
118 | 
119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
120 |     if chunk_sizes is None:
121 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
122 |     standard_size = True
123 |     for i in range(1, len(chunk_sizes)):
124 |         if chunk_sizes[i] != chunk_sizes[0]:
125 |             standard_size = False
126 |     if standard_size:
127 |         return torch.nn.DataParallel(module, device_ids, output_device, dim)
128 |     return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)


--------------------------------------------------------------------------------
/src/lib/models/decode.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import numpy as np
  8 | from .utils import _gather_feat, _tranpose_and_gather_feat
  9 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian
 10 | 
 11 | 
 12 | def _nms(heat, kernel=3):
 13 |     pad = (kernel - 1) // 2
 14 | 
 15 |     hmax = nn.functional.max_pool2d(
 16 |         heat, (kernel, kernel), stride=1, padding=pad)
 17 |     keep = (hmax == heat).float()
 18 |     return heat * keep
 19 | 
 20 | 
 21 | def _left_aggregate(heat):
 22 |     '''
 23 |         heat: batchsize x channels x h x w
 24 |     '''
 25 |     shape = heat.shape
 26 |     heat = heat.reshape(-1, heat.shape[3])
 27 |     heat = heat.transpose(1, 0).contiguous()
 28 |     ret = heat.clone()
 29 |     for i in range(1, heat.shape[0]):
 30 |         inds = (heat[i] >= heat[i - 1])
 31 |         ret[i] += ret[i - 1] * inds.float()
 32 |     return (ret - heat).transpose(1, 0).reshape(shape)
 33 | 
 34 | 
 35 | def _right_aggregate(heat):
 36 |     '''
 37 |         heat: batchsize x channels x h x w
 38 |     '''
 39 |     shape = heat.shape
 40 |     heat = heat.reshape(-1, heat.shape[3])
 41 |     heat = heat.transpose(1, 0).contiguous()
 42 |     ret = heat.clone()
 43 |     for i in range(heat.shape[0] - 2, -1, -1):
 44 |         inds = (heat[i] >= heat[i +1])
 45 |         ret[i] += ret[i + 1] * inds.float()
 46 |     return (ret - heat).transpose(1, 0).reshape(shape)
 47 | 
 48 | 
 49 | def _top_aggregate(heat):
 50 |     '''
 51 |         heat: batchsize x channels x h x w
 52 |     '''
 53 |     heat = heat.transpose(3, 2)
 54 |     shape = heat.shape
 55 |     heat = heat.reshape(-1, heat.shape[3])
 56 |     heat = heat.transpose(1, 0).contiguous()
 57 |     ret = heat.clone()
 58 |     for i in range(1, heat.shape[0]):
 59 |         inds = (heat[i] >= heat[i - 1])
 60 |         ret[i] += ret[i - 1] * inds.float()
 61 |     return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
 62 | 
 63 | 
 64 | def _bottom_aggregate(heat):
 65 |     '''
 66 |         heat: batchsize x channels x h x w
 67 |     '''
 68 |     heat = heat.transpose(3, 2)
 69 |     shape = heat.shape
 70 |     heat = heat.reshape(-1, heat.shape[3])
 71 |     heat = heat.transpose(1, 0).contiguous()
 72 |     ret = heat.clone()
 73 |     for i in range(heat.shape[0] - 2, -1, -1):
 74 |         inds = (heat[i] >= heat[i + 1])
 75 |         ret[i] += ret[i + 1] * inds.float()
 76 |     return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
 77 | 
 78 | 
 79 | def _h_aggregate(heat, aggr_weight=0.1):
 80 |     return aggr_weight * _left_aggregate(heat) + \
 81 |            aggr_weight * _right_aggregate(heat) + heat
 82 | 
 83 | 
 84 | def _v_aggregate(heat, aggr_weight=0.1):
 85 |     return aggr_weight * _top_aggregate(heat) + \
 86 |            aggr_weight * _bottom_aggregate(heat) + heat
 87 | '''
 88 | # Slow for large number of categories
 89 | def _topk(scores, K=40):
 90 |     batch, cat, height, width = scores.size()
 91 |     topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
 92 | 
 93 |     topk_clses = (topk_inds / (height * width)).int()
 94 | 
 95 |     topk_inds = topk_inds % (height * width)
 96 |     topk_ys   = (topk_inds / width).int().float()
 97 |     topk_xs   = (topk_inds % width).int().float()
 98 |     return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
 99 | '''
100 | 
101 | 
102 | def _topk_channel(scores, K=40):
103 |       batch, cat, height, width = scores.size()
104 | 
105 |       topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
106 | 
107 |       topk_inds = topk_inds % (height * width)
108 |       topk_ys   = (topk_inds / width).int().float()
109 |       topk_xs   = (topk_inds % width).int().float()
110 | 
111 |       return topk_scores, topk_inds, topk_ys, topk_xs
112 | 
113 | 
114 | def _topk(scores, K=40):
115 |     batch, cat, height, width = scores.size()
116 | 
117 |     topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
118 | 
119 |     topk_inds = topk_inds % (height * width)
120 |     topk_ys   = (topk_inds / width).int().float()
121 |     topk_xs   = (topk_inds % width).int().float()
122 | 
123 |     topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
124 |     topk_clses = (topk_ind / K).int()
125 |     topk_inds = _gather_feat(
126 |         topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
127 |     topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
128 |     topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
129 | 
130 |     return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
131 | 
132 | 
133 | def ctdet_decode(hm_act, wh_act, reg_act=None, K=100):
134 |     batch, cat, height, width = hm_act.size()
135 | 
136 |     hm_act = _nms(hm_act, kernel=3)       # need to adjust the kernel size.
137 | 
138 |     scores_act, inds_act, clses_act, ys_act, xs_act = _topk(hm_act, K=K)
139 | 
140 |     if reg_act is not None:
141 |       reg_act = _tranpose_and_gather_feat(reg_act, inds_act)
142 |       reg_act = reg_act.view(batch, K, 2)
143 |       xs_act = xs_act.view(batch, K, 1) + reg_act[:, :, 0:1]
144 |       ys_act = ys_act.view(batch, K, 1) + reg_act[:, :, 1:2]
145 |     else:
146 |       xs_act = xs_act.view(batch, K, 1) + 0.5
147 |       ys_act = ys_act.view(batch, K, 1) + 0.5
148 | 
149 |     wh_act = _tranpose_and_gather_feat(wh_act, inds_act)
150 | 
151 |     wh_act = wh_act.view(batch, K, 2)
152 | 
153 |     clses_act = clses_act.view(batch, K, 1).float()
154 |     scores_act = scores_act.view(batch, K, 1)
155 | 
156 |     bboxes = torch.cat([xs_act - wh_act[..., 0:1] / 2,
157 |                         ys_act - wh_act[..., 1:2] / 2,
158 |                         xs_act + wh_act[..., 0:1] / 2,
159 |                         ys_act + wh_act[..., 1:2] / 2], dim=2)
160 | 
161 |     detections_act = torch.cat([xs_act, ys_act, bboxes, scores_act, clses_act], dim=2)
162 | 
163 |     # print(detections_act.shape)
164 |     # print(detections_act)
165 | 
166 |     return detections_act
167 | 


--------------------------------------------------------------------------------
/src/lib/models/losses.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Portions of this code are from
  3 | # CornerNet (https://github.com/princeton-vl/CornerNet)
  4 | # Copyright (c) 2018, University of Michigan
  5 | # Licensed under the BSD 3-Clause License
  6 | # ------------------------------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | from .utils import _tranpose_and_gather_feat
 14 | import torch.nn.functional as F
 15 | 
 16 | 
 17 | def _slow_neg_loss(pred, gt):
 18 |   '''focal loss from CornerNet'''
 19 |   pos_inds = gt.eq(1)
 20 |   neg_inds = gt.lt(1)
 21 | 
 22 |   neg_weights = torch.pow(1 - gt[neg_inds], 4)
 23 | 
 24 |   loss = 0
 25 |   pos_pred = pred[pos_inds]
 26 |   neg_pred = pred[neg_inds]
 27 | 
 28 |   pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
 29 |   neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
 30 | 
 31 |   num_pos  = pos_inds.float().sum()
 32 |   pos_loss = pos_loss.sum()
 33 |   neg_loss = neg_loss.sum()
 34 | 
 35 |   if pos_pred.nelement() == 0:
 36 |     loss = loss - neg_loss
 37 |   else:
 38 |     loss = loss - (pos_loss + neg_loss) / num_pos
 39 |   return loss
 40 | 
 41 | 
 42 | def _neg_loss(pred, gt):
 43 |   ''' Modified focal loss. Exactly the same as CornerNet.
 44 |       Runs faster and costs a little bit more memory
 45 |     Arguments:
 46 |       pred (batch x c x h x w)
 47 |       gt_regr (batch x c x h x w)
 48 |   '''
 49 |   pos_inds = gt.eq(1).float()
 50 |   neg_inds = gt.lt(1).float()
 51 | 
 52 |   neg_weights = torch.pow(1 - gt, 4)
 53 | 
 54 |   loss = 0
 55 | 
 56 |   pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
 57 |   neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
 58 | 
 59 |   num_pos  = pos_inds.float().sum()
 60 |   pos_loss = pos_loss.sum()
 61 |   neg_loss = neg_loss.sum()
 62 | 
 63 |   if num_pos == 0:
 64 |     loss = loss - neg_loss
 65 |   else:
 66 |     loss = loss - (pos_loss + neg_loss) / num_pos
 67 |   return loss
 68 | 
 69 | def _not_faster_neg_loss(pred, gt):
 70 |     pos_inds = gt.eq(1).float()
 71 |     neg_inds = gt.lt(1).float()
 72 |     num_pos  = pos_inds.float().sum()
 73 |     neg_weights = torch.pow(1 - gt, 4)
 74 | 
 75 |     loss = 0
 76 |     trans_pred = pred * neg_inds + (1 - pred) * pos_inds
 77 |     weight = neg_weights * neg_inds + pos_inds
 78 |     all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
 79 |     all_loss = all_loss.sum()
 80 | 
 81 |     if num_pos > 0:
 82 |         all_loss /= num_pos
 83 |     loss -=  all_loss
 84 |     return loss
 85 | 
 86 | def _slow_reg_loss(regr, gt_regr, mask):
 87 |     num  = mask.float().sum()
 88 |     mask = mask.unsqueeze(2).expand_as(gt_regr)
 89 | 
 90 |     regr    = regr[mask]
 91 |     gt_regr = gt_regr[mask]
 92 | 
 93 |     regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
 94 |     regr_loss = regr_loss / (num + 1e-4)
 95 |     return regr_loss
 96 | 
 97 | def _reg_loss(regr, gt_regr, mask):
 98 |   ''' L1 regression loss
 99 |     Arguments:
100 |       regr (batch x max_objects x dim)
101 |       gt_regr (batch x max_objects x dim)
102 |       mask (batch x max_objects)
103 |   '''
104 |   num = mask.float().sum()
105 |   mask = mask.unsqueeze(2).expand_as(gt_regr).float()
106 | 
107 |   regr = regr * mask
108 |   gt_regr = gt_regr * mask
109 | 
110 |   regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
111 |   regr_loss = regr_loss / (num + 1e-4)
112 |   return regr_loss
113 | 
114 | class FocalLoss(nn.Module):
115 |   '''nn.Module warpper for focal loss'''
116 |   def __init__(self):
117 |     super(FocalLoss, self).__init__()
118 |     self.neg_loss = _neg_loss
119 | 
120 |   def forward(self, out, target):
121 |     return self.neg_loss(out, target)
122 | 
123 | class RegLoss(nn.Module):
124 |   '''Regression loss for an output tensor
125 |     Arguments:
126 |       output (batch x dim x h x w)
127 |       mask (batch x max_objects)
128 |       ind (batch x max_objects)
129 |       target (batch x max_objects x dim)
130 |   '''
131 |   def __init__(self):
132 |     super(RegLoss, self).__init__()
133 | 
134 |   def forward(self, output, mask, ind, target):
135 |     pred = _tranpose_and_gather_feat(output, ind)
136 |     loss = _reg_loss(pred, target, mask)
137 |     return loss
138 | 
139 | class RegL1Loss(nn.Module):
140 |   def __init__(self):
141 |     super(RegL1Loss, self).__init__()
142 | 
143 |   def forward(self, output, mask, ind, target):
144 |     pred = _tranpose_and_gather_feat(output, ind)
145 |     mask = mask.unsqueeze(2).expand_as(pred).float()
146 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
147 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
148 |     loss = loss / (mask.sum() + 1e-4)
149 |     return loss
150 | 
151 | class NormRegL1Loss(nn.Module):
152 |   def __init__(self):
153 |     super(NormRegL1Loss, self).__init__()
154 | 
155 |   def forward(self, output, mask, ind, target):
156 |     pred = _tranpose_and_gather_feat(output, ind)
157 |     mask = mask.unsqueeze(2).expand_as(pred).float()
158 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
159 |     pred = pred / (target + 1e-4)
160 |     target = target * 0 + 1
161 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
162 |     loss = loss / (mask.sum() + 1e-4)
163 |     return loss
164 | 
165 | class RegWeightedL1Loss(nn.Module):
166 |   def __init__(self):
167 |     super(RegWeightedL1Loss, self).__init__()
168 | 
169 |   def forward(self, output, mask, ind, target):
170 |     pred = _tranpose_and_gather_feat(output, ind)
171 |     mask = mask.float()
172 |     # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
173 |     loss = F.l1_loss(pred * mask, target * mask, size_average=False)
174 |     loss = loss / (mask.sum() + 1e-4)
175 |     return loss
176 | 
177 | class L1Loss(nn.Module):
178 |   def __init__(self):
179 |     super(L1Loss, self).__init__()
180 | 
181 |   def forward(self, output, mask, ind, target):
182 |     pred = _tranpose_and_gather_feat(output, ind)
183 |     mask = mask.unsqueeze(2).expand_as(pred).float()
184 |     loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
185 |     return loss
186 | 
187 | class BinRotLoss(nn.Module):
188 |   def __init__(self):
189 |     super(BinRotLoss, self).__init__()
190 | 
191 |   def forward(self, output, mask, ind, rotbin, rotres):
192 |     pred = _tranpose_and_gather_feat(output, ind)
193 |     loss = compute_rot_loss(pred, rotbin, rotres, mask)
194 |     return loss
195 | 
196 | def compute_res_loss(output, target):
197 |     return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
198 | 
199 | # TODO: weight
200 | def compute_bin_loss(output, target, mask):
201 |     mask = mask.expand_as(output)
202 |     output = output * mask.float()
203 |     return F.cross_entropy(output, target, reduction='elementwise_mean')
204 | 
205 | def compute_rot_loss(output, target_bin, target_res, mask):
206 |     # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
207 |     #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
208 |     # target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
209 |     # target_res: (B, 128, 2) [bin1_res, bin2_res]
210 |     # mask: (B, 128, 1)
211 |     # import pdb; pdb.set_trace()
212 |     output = output.view(-1, 8)
213 |     target_bin = target_bin.view(-1, 2)
214 |     target_res = target_res.view(-1, 2)
215 |     mask = mask.view(-1, 1)
216 |     loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
217 |     loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
218 |     loss_res = torch.zeros_like(loss_bin1)
219 |     if target_bin[:, 0].nonzero().shape[0] > 0:
220 |         idx1 = target_bin[:, 0].nonzero()[:, 0]
221 |         valid_output1 = torch.index_select(output, 0, idx1.long())
222 |         valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
223 |         loss_sin1 = compute_res_loss(
224 |           valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
225 |         loss_cos1 = compute_res_loss(
226 |           valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
227 |         loss_res += loss_sin1 + loss_cos1
228 |     if target_bin[:, 1].nonzero().shape[0] > 0:
229 |         idx2 = target_bin[:, 1].nonzero()[:, 0]
230 |         valid_output2 = torch.index_select(output, 0, idx2.long())
231 |         valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
232 |         loss_sin2 = compute_res_loss(
233 |           valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
234 |         loss_cos2 = compute_res_loss(
235 |           valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
236 |         loss_res += loss_sin2 + loss_cos2
237 |     return loss_bin1 + loss_bin2 + loss_res
238 | 


--------------------------------------------------------------------------------
/src/lib/models/model.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torchvision.models as models
  6 | import torch
  7 | import torch.nn as nn
  8 | import os
  9 | 
 10 | # from .networks.msra_resnet import get_pose_net
 11 | # from .networks.dlav0 import get_pose_net as get_dlav0
 12 | # from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
 13 | # from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
 14 | from .networks.large_hourglass import get_large_hourglass_net
 15 | 
 16 | _model_factory = {
 17 |   # 'res': get_pose_net, # default Resnet with deconv
 18 |   # 'dlav0': get_dlav0, # default DLAup
 19 |   # 'dla': get_dla_dcn,
 20 |   # 'resdcn': get_pose_net_dcn,
 21 |   'hourglass': get_large_hourglass_net,
 22 | }
 23 | 
 24 | 
 25 | def create_model(arch, heads, head_conv):
 26 |   num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
 27 |   arch = arch[:arch.find('_')] if '_' in arch else arch
 28 |   get_model = _model_factory[arch]
 29 |   model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv)
 30 |   return model
 31 | 
 32 | 
 33 | # def load_model(model, model_path, optimizer=None, resume=False,
 34 | #                lr=None, lr_step=None):
 35 | #   start_epoch = 0
 36 | #   checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
 37 | #   print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
 38 | #   state_dict_ = checkpoint['state_dict']
 39 | #   state_dict = {}
 40 | #
 41 | #   # convert data_parallal to model
 42 | #   for k in state_dict_:
 43 | #     if k.startswith('module') and not k.startswith('module_list'):
 44 | #       state_dict[k[7:]] = state_dict_[k]
 45 | #     else:
 46 | #       state_dict[k] = state_dict_[k]
 47 | #   model_state_dict = model.state_dict()
 48 | #
 49 | #   # check loaded parameters and created model parameters
 50 | #   for k in state_dict:
 51 | #     if k in model_state_dict:
 52 | #       if state_dict[k].shape != model_state_dict[k].shape:
 53 | #         print('Skip loading parameter {}, required shape{}, '\
 54 | #               'loaded shape{}.'.format(
 55 | #           k, model_state_dict[k].shape, state_dict[k].shape))
 56 | #         state_dict[k] = model_state_dict[k]
 57 | #     else:
 58 | #       print('Drop parameter {}.'.format(k))
 59 | #   for k in model_state_dict:
 60 | #     if not (k in state_dict):
 61 | #       print('No param {}.'.format(k))
 62 | #       state_dict[k] = model_state_dict[k]
 63 | #   model.load_state_dict(state_dict, strict=False)
 64 | #
 65 | #   # resume optimizer parameters
 66 | #   if optimizer is not None and resume:
 67 | #     if 'optimizer' in checkpoint:
 68 | #       optimizer.load_state_dict(checkpoint['optimizer'])
 69 | #       start_epoch = checkpoint['epoch']
 70 | #       start_lr = lr
 71 | #       for step in lr_step:
 72 | #         if start_epoch >= step:
 73 | #           start_lr *= 0.1
 74 | #       for param_group in optimizer.param_groups:
 75 | #         param_group['lr'] = start_lr
 76 | #       print('Resumed optimizer with start lr', start_lr)
 77 | #     else:
 78 | #       print('No optimizer parameters in checkpoint.')
 79 | #   if optimizer is not None:
 80 | #     return model, optimizer, start_epoch
 81 | #   else:
 82 | #     return model
 83 | 
 84 | def load_model(model, model_path):
 85 |   checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
 86 |   print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
 87 |   state_dict_ = checkpoint['state_dict']
 88 |   state_dict = {}
 89 | 
 90 |   # convert data_parallal to model
 91 |   for k in state_dict_:
 92 |     if k.startswith('module') and not k.startswith('module_list'):
 93 |       state_dict[k[7:]] = state_dict_[k]
 94 |     else:
 95 |       state_dict[k] = state_dict_[k]
 96 |   model_state_dict = model.state_dict()
 97 | 
 98 |   # check loaded parameters and created model parameters
 99 |   for k in state_dict:
100 |     if k in model_state_dict:
101 |       if state_dict[k].shape != model_state_dict[k].shape:
102 |         print('Skip loading parameter {}, required shape{}, '\
103 |               'loaded shape{}.'.format(
104 |           k, model_state_dict[k].shape, state_dict[k].shape))
105 |         state_dict[k] = model_state_dict[k]
106 |     else:
107 |       print('Drop parameter {}.'.format(k))
108 |   for k in model_state_dict:
109 |     if not (k in state_dict):
110 |       print('No param {}.'.format(k))
111 |       state_dict[k] = model_state_dict[k]
112 |   model.load_state_dict(state_dict, strict=False)
113 | 
114 |   return model
115 | 
116 | 
117 | def save_model(path, epoch, model, optimizer=None):
118 |   if isinstance(model, torch.nn.DataParallel):
119 |     state_dict = model.module.state_dict()
120 |   else:
121 |     state_dict = model.state_dict()
122 |   data = {'epoch': epoch,
123 |           'state_dict': state_dict}
124 |   if not (optimizer is None):
125 |     data['optimizer'] = optimizer.state_dict()
126 |   torch.save(data, path)
127 | 
128 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | .idea
3 | *.so
4 | *.o
5 | *pyc
6 | _ext


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Charles Shang
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright notice, this
10 |    list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 |    this list of conditions and the following disclaimer in the documentation
14 |    and/or other materials provided with the distribution.
15 | 
16 | 3. Neither the name of the copyright holder nor the names of its
17 |    contributors may be used to endorse or promote products derived from
18 |    this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/README.md:
--------------------------------------------------------------------------------
 1 | ## Deformable Convolutional Networks V2 with Pytorch
 2 | 
 3 | ### Build
 4 | ```bash
 5 |     ./make.sh         # build
 6 |     python test.py    # run examples and gradient check 
 7 | ```
 8 | 
 9 | ### An Example
10 | - deformable conv
11 | ```python
12 |     from dcn_v2 import DCN
13 |     input = torch.randn(2, 64, 128, 128).cuda()
14 |     # wrap all things (offset and mask) in DCN
15 |     dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
16 |     output = dcn(input)
17 |     print(output.shape)
18 | ```
19 | - deformable roi pooling
20 | ```python
21 |     from dcn_v2 import DCNPooling
22 |     input = torch.randn(2, 32, 64, 64).cuda()
23 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
24 |     x = torch.randint(256, (20, 1)).cuda().float()
25 |     y = torch.randint(256, (20, 1)).cuda().float()
26 |     w = torch.randint(64, (20, 1)).cuda().float()
27 |     h = torch.randint(64, (20, 1)).cuda().float()
28 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
29 | 
30 |     # mdformable pooling (V2)
31 |     # wrap all things (offset and mask) in DCNPooling
32 |     dpooling = DCNPooling(spatial_scale=1.0 / 4,
33 |                          pooled_size=7,
34 |                          output_dim=32,
35 |                          no_trans=False,
36 |                          group_size=1,
37 |                          trans_std=0.1).cuda()
38 | 
39 |     dout = dpooling(input, rois)
40 | ```
41 | 
42 | ### Known Issues:
43 | 
44 | - [x] Gradient check w.r.t offset (solved)
45 | - [ ] Backward is not reentrant (minor)
46 | 
47 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
48 | 
49 | <s>I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
50 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
51 | non-differential points? </s>
52 | 
53 | Update: all gradient check passes with double precision. 
54 | 
55 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 
56 | float `<1e-15` for double), 
57 | so it may not be a serious problem (?)
58 | 
59 | Please post an issue or PR if you have any comments.
60 |     


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/models/networks/DCNv2/__init__.py


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | # from torch.utils.cpp_extension import BuildExtension
 5 | 
 6 | 
 7 | sources = ['src/dcn_v2.c']
 8 | headers = ['src/dcn_v2.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | extra_objects = []
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/dcn_v2_cuda.c']
16 |     headers += ['src/dcn_v2_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     extra_objects += ['src/cuda/dcn_v2_im2col_cuda.cu.o']
19 |     extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda.cu.o']
20 |     with_cuda = True
21 | else:
22 |     raise ValueError('CUDA is not available')
23 | 
24 | extra_compile_args = ['-fopenmp', '-std=c99']
25 | 
26 | this_file = os.path.dirname(os.path.realpath(__file__))
27 | print(this_file)
28 | sources = [os.path.join(this_file, fname) for fname in sources]
29 | headers = [os.path.join(this_file, fname) for fname in headers]
30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
31 | 
32 | 
33 | # ffi = BuildExtension(
34 | ffi = create_extension(
35 |     '_ext.dcn_v2',
36 |     headers=headers,
37 |     sources=sources,
38 |     define_macros=defines,
39 |     relative_to=__file__,
40 |     with_cuda=with_cuda,
41 |     extra_objects=extra_objects,
42 |     extra_compile_args=extra_compile_args
43 | )
44 | 
45 | if __name__ == '__main__':
46 |     ffi.build()
47 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/build_double.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | # from torch.utils.cpp_extension import BuildExtension
 5 | 
 6 | 
 7 | sources = ['src/dcn_v2_double.c']
 8 | headers = ['src/dcn_v2_double.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | extra_objects = []
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/dcn_v2_cuda_double.c']
16 |     headers += ['src/dcn_v2_cuda_double.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     extra_objects += ['src/cuda/dcn_v2_im2col_cuda_double.cu.o']
19 |     extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda_double.cu.o']
20 |     with_cuda = True
21 | else:
22 |     raise ValueError('CUDA is not available')
23 | 
24 | extra_compile_args = ['-fopenmp', '-std=c99']
25 | 
26 | this_file = os.path.dirname(os.path.realpath(__file__))
27 | print(this_file)
28 | sources = [os.path.join(this_file, fname) for fname in sources]
29 | headers = [os.path.join(this_file, fname) for fname in headers]
30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
31 | 
32 | # ffi = BuildExtension(
33 | ffi = create_extension(
34 |     '_ext.dcn_v2_double',
35 |     headers=headers,
36 |     sources=sources,
37 |     define_macros=defines,
38 |     relative_to=__file__,
39 |     with_cuda=with_cuda,
40 |     extra_objects=extra_objects,
41 |     extra_compile_args=extra_compile_args
42 | )
43 | 
44 | if __name__ == '__main__':
45 |     ffi.build()
46 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/dcn_v2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import absolute_import
  3 | from __future__ import print_function
  4 | from __future__ import division
  5 | 
  6 | import torch
  7 | import math
  8 | from torch import nn
  9 | from torch.nn.modules.utils import _pair
 10 | 
 11 | from .dcn_v2_func import DCNv2Function
 12 | from .dcn_v2_func import DCNv2PoolingFunction
 13 | 
 14 | class DCNv2(nn.Module):
 15 | 
 16 |     def __init__(self, in_channels, out_channels,
 17 |                  kernel_size, stride, padding, dilation=1, deformable_groups=1):
 18 |         super(DCNv2, self).__init__()
 19 |         self.in_channels = in_channels
 20 |         self.out_channels = out_channels
 21 |         self.kernel_size = _pair(kernel_size)
 22 |         self.stride = stride
 23 |         self.padding = padding
 24 |         self.dilation = dilation
 25 |         self.deformable_groups = deformable_groups
 26 | 
 27 |         self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size))
 28 |         self.bias = nn.Parameter(torch.Tensor(out_channels))
 29 |         self.reset_parameters()
 30 | 
 31 |     def reset_parameters(self):
 32 |         n = self.in_channels
 33 |         for k in self.kernel_size:
 34 |             n *= k
 35 |         stdv = 1. / math.sqrt(n)
 36 |         self.weight.data.uniform_(-stdv, stdv)
 37 |         self.bias.data.zero_()
 38 | 
 39 |     def forward(self, input, offset, mask):
 40 |         func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups)
 41 |         return func(input, offset, mask, self.weight, self.bias)
 42 | 
 43 | 
 44 | class DCN(DCNv2):
 45 | 
 46 |     def __init__(self, in_channels, out_channels,
 47 |                  kernel_size, stride, padding,
 48 |                  dilation=1, deformable_groups=1):
 49 |         super(DCN, self).__init__(in_channels, out_channels,
 50 |                                   kernel_size, stride, padding, dilation, deformable_groups)
 51 | 
 52 |         self.conv_offset_mask = nn.Conv2d(self.in_channels,
 53 |                                           self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
 54 |                                           kernel_size=self.kernel_size,
 55 |                                           stride=(self.stride, self.stride),
 56 |                                           padding=(self.padding, self.padding),
 57 |                                           bias=True)
 58 |         self.init_offset()
 59 | 
 60 |     def init_offset(self):
 61 |         self.conv_offset_mask.weight.data.zero_()
 62 |         self.conv_offset_mask.bias.data.zero_()
 63 | 
 64 |     def forward(self, input):
 65 |         out = self.conv_offset_mask(input)
 66 |         o1, o2, mask = torch.chunk(out, 3, dim=1)
 67 |         offset = torch.cat((o1, o2), dim=1)
 68 |         mask = torch.sigmoid(mask)
 69 |         func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups)
 70 |         return func(input, offset, mask, self.weight, self.bias)
 71 | 
 72 | 
 73 | class DCNv2Pooling(nn.Module):
 74 | 
 75 |     def __init__(self,
 76 |                  spatial_scale,
 77 |                  pooled_size,
 78 |                  output_dim,
 79 |                  no_trans,
 80 |                  group_size=1,
 81 |                  part_size=None,
 82 |                  sample_per_part=4,
 83 |                  trans_std=.0):
 84 |         super(DCNv2Pooling, self).__init__()
 85 |         self.spatial_scale = spatial_scale
 86 |         self.pooled_size = pooled_size
 87 |         self.output_dim = output_dim
 88 |         self.no_trans = no_trans
 89 |         self.group_size = group_size
 90 |         self.part_size = pooled_size if part_size is None else part_size
 91 |         self.sample_per_part = sample_per_part
 92 |         self.trans_std = trans_std
 93 |         self.func = DCNv2PoolingFunction(self.spatial_scale,
 94 |                              self.pooled_size,
 95 |                              self.output_dim,
 96 |                              self.no_trans,
 97 |                              self.group_size,
 98 |                              self.part_size,
 99 |                              self.sample_per_part,
100 |                              self.trans_std)
101 | 
102 |     def forward(self, data, rois, offset):
103 | 
104 |         if self.no_trans:
105 |             offset = data.new()
106 |         return self.func(data, rois, offset)
107 | 
108 | class DCNPooling(DCNv2Pooling):
109 | 
110 |     def __init__(self,
111 |                  spatial_scale,
112 |                  pooled_size,
113 |                  output_dim,
114 |                  no_trans,
115 |                  group_size=1,
116 |                  part_size=None,
117 |                  sample_per_part=4,
118 |                  trans_std=.0,
119 |                  deform_fc_dim=1024):
120 |         super(DCNPooling, self).__init__(spatial_scale,
121 |                                          pooled_size,
122 |                                          output_dim,
123 |                                          no_trans,
124 |                                          group_size,
125 |                                          part_size,
126 |                                          sample_per_part,
127 |                                          trans_std)
128 | 
129 |         self.deform_fc_dim = deform_fc_dim
130 | 
131 |         if not no_trans:
132 |             self.func_offset = DCNv2PoolingFunction(self.spatial_scale,
133 |                                                     self.pooled_size,
134 |                                                     self.output_dim,
135 |                                                     True,
136 |                                                     self.group_size,
137 |                                                     self.part_size,
138 |                                                     self.sample_per_part,
139 |                                                     self.trans_std)
140 |             self.offset_fc = nn.Sequential(
141 |                 nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
142 |                 nn.ReLU(inplace=True),
143 |                 nn.Linear(self.deform_fc_dim, self.deform_fc_dim),
144 |                 nn.ReLU(inplace=True),
145 |                 nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 2)
146 |             )
147 |             self.offset_fc[4].weight.data.zero_()
148 |             self.offset_fc[4].bias.data.zero_()
149 |             self.mask_fc = nn.Sequential(
150 |                 nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim),
151 |                 nn.ReLU(inplace=True),
152 |                 nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 1),
153 |                 nn.Sigmoid()
154 |             )
155 |             self.mask_fc[2].weight.data.zero_()
156 |             self.mask_fc[2].bias.data.zero_()
157 | 
158 |     def forward(self, data, rois):
159 |         if self.no_trans:
160 |             offset = data.new()
161 |         else:
162 |             n = rois.shape[0]
163 |             offset = data.new()
164 |             x = self.func_offset(data, rois, offset)
165 |             offset = self.offset_fc(x.view(n, -1))
166 |             offset = offset.view(n, 2, self.pooled_size, self.pooled_size)
167 |             mask = self.mask_fc(x.view(n, -1))
168 |             mask = mask.view(n, 1, self.pooled_size, self.pooled_size)
169 |             feat = self.func(data, rois, offset) * mask
170 |             return feat
171 |         return self.func(data, rois, offset)
172 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/dcn_v2_func.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import absolute_import
  3 | from __future__ import print_function
  4 | from __future__ import division
  5 | 
  6 | import torch
  7 | from torch.autograd import Function
  8 | 
  9 | from ._ext import dcn_v2 as _backend
 10 | # from _ext import dcn_v2_double as _backend
 11 | 
 12 | 
 13 | class DCNv2Function(Function):
 14 | 
 15 |     def __init__(self, stride, padding, dilation=1, deformable_groups=1):
 16 |         super(DCNv2Function, self).__init__()
 17 |         self.stride = stride
 18 |         self.padding = padding
 19 |         self.dilation = dilation
 20 |         self.deformable_groups = deformable_groups
 21 | 
 22 |     def forward(self, input, offset, mask, weight, bias):
 23 |         if not input.is_cuda:
 24 |             raise NotImplementedError
 25 |         if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad:
 26 |             self.save_for_backward(input, offset, mask, weight, bias)
 27 |         output = input.new(*self._infer_shape(input, weight))
 28 |         self._bufs = [input.new(), input.new()]
 29 |         _backend.dcn_v2_cuda_forward(input, weight,
 30 |                                      bias, self._bufs[0],
 31 |                                      offset, mask,
 32 |                                      output, self._bufs[1],
 33 |                                      weight.shape[2], weight.shape[3],
 34 |                                      self.stride, self.stride,
 35 |                                      self.padding, self.padding,
 36 |                                      self.dilation, self.dilation,
 37 |                                      self.deformable_groups)
 38 |         return output
 39 | 
 40 |     def backward(self, grad_output):
 41 |         if not grad_output.is_cuda:
 42 |             raise NotImplementedError
 43 |         input, offset, mask, weight, bias = self.saved_tensors
 44 |         grad_input = input.new(*input.size()).zero_()
 45 |         grad_offset = offset.new(*offset.size()).zero_()
 46 |         grad_mask = mask.new(*mask.size()).zero_()
 47 |         grad_weight = weight.new(*weight.size()).zero_()
 48 |         grad_bias = bias.new(*bias.size()).zero_()
 49 |         _backend.dcn_v2_cuda_backward(input, weight,
 50 |                                       bias, self._bufs[0],
 51 |                                       offset, mask,
 52 |                                       self._bufs[1],
 53 |                                       grad_input, grad_weight,
 54 |                                       grad_bias, grad_offset,
 55 |                                       grad_mask, grad_output,
 56 |                                       weight.shape[2], weight.shape[3],
 57 |                                       self.stride, self.stride,
 58 |                                       self.padding, self.padding,
 59 |                                       self.dilation, self.dilation,
 60 |                                       self.deformable_groups)
 61 | 
 62 |         return grad_input, grad_offset, grad_mask, grad_weight, grad_bias
 63 | 
 64 |     def _infer_shape(self, input, weight):
 65 |         n = input.size(0)
 66 |         channels_out = weight.size(0)
 67 |         height, width = input.shape[2:4]
 68 |         kernel_h, kernel_w = weight.shape[2:4]
 69 |         height_out = (height + 2 * self.padding -
 70 |                       (self.dilation * (kernel_h - 1) + 1)) // self.stride + 1
 71 |         width_out = (width + 2 * self.padding - (self.dilation *
 72 |                                                  (kernel_w - 1) + 1)) // self.stride + 1
 73 |         return (n, channels_out, height_out, width_out)
 74 | 
 75 | 
 76 | class DCNv2PoolingFunction(Function):
 77 | 
 78 |     def __init__(self,
 79 |                  spatial_scale,
 80 |                  pooled_size,
 81 |                  output_dim,
 82 |                  no_trans,
 83 |                  group_size=1,
 84 |                  part_size=None,
 85 |                  sample_per_part=4,
 86 |                  trans_std=.0):
 87 |         super(DCNv2PoolingFunction, self).__init__()
 88 |         self.spatial_scale = spatial_scale
 89 |         self.pooled_size = pooled_size
 90 |         self.output_dim = output_dim
 91 |         self.no_trans = no_trans
 92 |         self.group_size = group_size
 93 |         self.part_size = pooled_size if part_size is None else part_size
 94 |         self.sample_per_part = sample_per_part
 95 |         self.trans_std = trans_std
 96 | 
 97 |         assert self.trans_std >= 0.0 and self.trans_std <= 1.0
 98 | 
 99 |     def forward(self, data, rois, offset):
100 |         if not data.is_cuda:
101 |             raise NotImplementedError
102 | 
103 |         output = data.new(*self._infer_shape(data, rois))
104 |         output_count = data.new(*self._infer_shape(data, rois))
105 |         _backend.dcn_v2_psroi_pooling_cuda_forward(data, rois, offset,
106 |                                                    output, output_count,
107 |                                                    self.no_trans, self.spatial_scale,
108 |                                                    self.output_dim, self.group_size,
109 |                                                    self.pooled_size, self.part_size,
110 |                                                    self.sample_per_part, self.trans_std)
111 | 
112 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
113 |             self.save_for_backward(data, rois, offset, output_count)
114 | 
115 |         return output
116 | 
117 |     def backward(self, grad_output):
118 |         if not grad_output.is_cuda:
119 |             raise NotImplementedError
120 | 
121 |         data, rois, offset, output_count = self.saved_tensors
122 |         grad_input = data.new(*data.size()).zero_()
123 |         grad_offset = offset.new(*offset.size()).zero_()
124 | 
125 |         _backend.dcn_v2_psroi_pooling_cuda_backward(grad_output,
126 |                                                     data,
127 |                                                     rois,
128 |                                                     offset,
129 |                                                     output_count,
130 |                                                     grad_input,
131 |                                                     grad_offset,
132 |                                                     self.no_trans,
133 |                                                     self.spatial_scale,
134 |                                                     self.output_dim,
135 |                                                     self.group_size,
136 |                                                     self.pooled_size,
137 |                                                     self.part_size,
138 |                                                     self.sample_per_part,
139 |                                                     self.trans_std)
140 |         return grad_input, None, grad_offset
141 | 
142 |     def _infer_shape(self, data, rois):
143 |         # _, c, h, w = data.shape[:4]
144 |         c = data.shape[1]
145 |         n = rois.shape[0]
146 |         return (n, self.output_dim, self.pooled_size, self.pooled_size)
147 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd src/cuda
 3 | 
 4 | # compile dcn
 5 | nvcc -c -o dcn_v2_im2col_cuda.cu.o dcn_v2_im2col_cuda.cu -x cu -Xcompiler -fPIC
 6 | nvcc -c -o dcn_v2_im2col_cuda_double.cu.o dcn_v2_im2col_cuda_double.cu -x cu -Xcompiler -fPIC
 7 | 
 8 | # compile dcn-roi-pooling
 9 | nvcc -c -o dcn_v2_psroi_pooling_cuda.cu.o dcn_v2_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC
10 | nvcc -c -o dcn_v2_psroi_pooling_cuda_double.cu.o dcn_v2_psroi_pooling_cuda_double.cu -x cu -Xcompiler -fPIC
11 | 
12 | cd -
13 | python build.py
14 | python build_double.py
15 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.h:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  3 |  *
  4 |  * COPYRIGHT
  5 |  *
  6 |  * All contributions by the University of California:
  7 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  8 |  * All rights reserved.
  9 |  *
 10 |  * All other contributions:
 11 |  * Copyright (c) 2014-2017, the respective contributors
 12 |  * All rights reserved.
 13 |  *
 14 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 15 |  * their contributions to Caffe. The project versioning records all such
 16 |  * contribution and copyright details. If a contributor wants to further mark
 17 |  * their specific copyright on a particular contribution, they should indicate
 18 |  * their copyright solely in the commit message of the change when it is
 19 |  * committed.
 20 |  *
 21 |  * LICENSE
 22 |  *
 23 |  * Redistribution and use in source and binary forms, with or without
 24 |  * modification, are permitted provided that the following conditions are met:
 25 |  *
 26 |  * 1. Redistributions of source code must retain the above copyright notice, this
 27 |  * list of conditions and the following disclaimer.
 28 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 29 |  * this list of conditions and the following disclaimer in the documentation
 30 |  * and/or other materials provided with the distribution.
 31 |  *
 32 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 33 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 34 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 35 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 36 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 37 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 38 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 39 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 40 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 41 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 42 |  *
 43 |  * CONTRIBUTION AGREEMENT
 44 |  *
 45 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 46 |  * or otherwise, the contributor releases their content to the
 47 |  * license and copyright terms herein.
 48 |  *
 49 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 50 |  *
 51 |  * Copyright (c) 2018 Microsoft
 52 |  * Licensed under The MIT License [see LICENSE for details]
 53 |  * \file modulated_deformable_im2col.h
 54 |  * \brief Function definitions of converting an image to
 55 |  * column matrix based on kernel, padding, dilation, and offset.
 56 |  * These functions are mainly used in deformable convolution operators.
 57 |  * \ref: https://arxiv.org/abs/1811.11168
 58 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
 59 |  */
 60 | 
 61 | /***************** Adapted by Charles Shang *********************/
 62 | 
 63 | #ifndef DCN_V2_IM2COL_CUDA
 64 | #define DCN_V2_IM2COL_CUDA
 65 | 
 66 | #ifdef __cplusplus
 67 | extern "C"
 68 | {
 69 | #endif
 70 | 
 71 |   void modulated_deformable_im2col_cuda(cudaStream_t stream,
 72 |                                         const float *data_im, const float *data_offset, const float *data_mask,
 73 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 74 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 75 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 76 |                                         const int dilation_h, const int dilation_w,
 77 |                                         const int deformable_group, float *data_col);
 78 | 
 79 |   void modulated_deformable_col2im_cuda(cudaStream_t stream,
 80 |                                         const float *data_col, const float *data_offset, const float *data_mask,
 81 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 82 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 83 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 84 |                                         const int dilation_h, const int dilation_w,
 85 |                                         const int deformable_group, float *grad_im);
 86 | 
 87 |   void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
 88 |                                          const float *data_col, const float *data_im, const float *data_offset, const float *data_mask,
 89 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
 90 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 91 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 92 |                                          const int dilation_h, const int dilation_w,
 93 |                                          const int deformable_group,
 94 |                                          float *grad_offset, float *grad_mask);
 95 | 
 96 | #ifdef __cplusplus
 97 | }
 98 | #endif
 99 | 
100 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.h:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  ******************* BEGIN Caffe Copyright Notice and Disclaimer ****************
  3 |  *
  4 |  * COPYRIGHT
  5 |  *
  6 |  * All contributions by the University of California:
  7 |  * Copyright (c) 2014-2017 The Regents of the University of California (Regents)
  8 |  * All rights reserved.
  9 |  *
 10 |  * All other contributions:
 11 |  * Copyright (c) 2014-2017, the respective contributors
 12 |  * All rights reserved.
 13 |  *
 14 |  * Caffe uses a shared copyright model: each contributor holds copyright over
 15 |  * their contributions to Caffe. The project versioning records all such
 16 |  * contribution and copyright details. If a contributor wants to further mark
 17 |  * their specific copyright on a particular contribution, they should indicate
 18 |  * their copyright solely in the commit message of the change when it is
 19 |  * committed.
 20 |  *
 21 |  * LICENSE
 22 |  *
 23 |  * Redistribution and use in source and binary forms, with or without
 24 |  * modification, are permitted provided that the following conditions are met:
 25 |  *
 26 |  * 1. Redistributions of source code must retain the above copyright notice, this
 27 |  * list of conditions and the following disclaimer.
 28 |  * 2. Redistributions in binary form must reproduce the above copyright notice,
 29 |  * this list of conditions and the following disclaimer in the documentation
 30 |  * and/or other materials provided with the distribution.
 31 |  *
 32 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 33 |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 34 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 35 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 36 |  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 37 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 38 |  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 39 |  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 40 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 41 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 42 |  *
 43 |  * CONTRIBUTION AGREEMENT
 44 |  *
 45 |  * By contributing to the BVLC/caffe repository through pull-request, comment,
 46 |  * or otherwise, the contributor releases their content to the
 47 |  * license and copyright terms herein.
 48 |  *
 49 |  ***************** END Caffe Copyright Notice and Disclaimer ********************
 50 |  *
 51 |  * Copyright (c) 2018 Microsoft
 52 |  * Licensed under The MIT License [see LICENSE for details]
 53 |  * \file modulated_deformable_im2col.h
 54 |  * \brief Function definitions of converting an image to
 55 |  * column matrix based on kernel, padding, dilation, and offset.
 56 |  * These functions are mainly used in deformable convolution operators.
 57 |  * \ref: https://arxiv.org/abs/1811.11168
 58 |  * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu
 59 |  */
 60 | 
 61 | /***************** Adapted by Charles Shang *********************/
 62 | 
 63 | #ifndef DCN_V2_IM2COL_CUDA_DOUBLE
 64 | #define DCN_V2_IM2COL_CUDA_DOUBLE
 65 | 
 66 | #ifdef __cplusplus
 67 | extern "C"
 68 | {
 69 | #endif
 70 | 
 71 |   void modulated_deformable_im2col_cuda(cudaStream_t stream,
 72 |                                         const double *data_im, const double *data_offset, const double *data_mask,
 73 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 74 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 75 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 76 |                                         const int dilation_h, const int dilation_w,
 77 |                                         const int deformable_group, double *data_col);
 78 | 
 79 |   void modulated_deformable_col2im_cuda(cudaStream_t stream,
 80 |                                         const double *data_col, const double *data_offset, const double *data_mask,
 81 |                                         const int batch_size, const int channels, const int height_im, const int width_im,
 82 |                                         const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 83 |                                         const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 84 |                                         const int dilation_h, const int dilation_w,
 85 |                                         const int deformable_group, double *grad_im);
 86 | 
 87 |   void modulated_deformable_col2im_coord_cuda(cudaStream_t stream,
 88 |                                          const double *data_col, const double *data_im, const double *data_offset, const double *data_mask,
 89 |                                          const int batch_size, const int channels, const int height_im, const int width_im,
 90 |                                          const int height_col, const int width_col, const int kernel_h, const int kenerl_w,
 91 |                                          const int pad_h, const int pad_w, const int stride_h, const int stride_w,
 92 |                                          const int dilation_h, const int dilation_w,
 93 |                                          const int deformable_group,
 94 |                                          double *grad_offset, double *grad_mask);
 95 | 
 96 | #ifdef __cplusplus
 97 | }
 98 | #endif
 99 | 
100 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft
 3 |  * Licensed under The MIT License [see LICENSE for details]
 4 |  * \file deformable_psroi_pooling.cu
 5 |  * \brief
 6 |  * \author Yi Li, Guodong Zhang, Jifeng Dai
 7 | */
 8 | /***************** Adapted by Charles Shang *********************/
 9 | 
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA
11 | #define DCN_V2_PSROI_POOLING_CUDA
12 | 
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 | 
18 |     void DeformablePSROIPoolForward(cudaStream_t stream,
19 |                                     const float *data,
20 |                                     const float *bbox,
21 |                                     const float *trans,
22 |                                     float *out,
23 |                                     float *top_count,
24 |                                     const int batch,
25 |                                     const int channels,
26 |                                     const int height,
27 |                                     const int width,
28 |                                     const int num_bbox,
29 |                                     const int channels_trans,
30 |                                     const int no_trans,
31 |                                     const float spatial_scale,
32 |                                     const int output_dim,
33 |                                     const int group_size,
34 |                                     const int pooled_size,
35 |                                     const int part_size,
36 |                                     const int sample_per_part,
37 |                                     const float trans_std);
38 | 
39 |     void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 |                                         const float *out_grad,
41 |                                         const float *data,
42 |                                         const float *bbox,
43 |                                         const float *trans,
44 |                                         const float *top_count,
45 |                                         float *in_grad,
46 |                                         float *trans_grad,
47 |                                         const int batch,
48 |                                         const int channels,
49 |                                         const int height,
50 |                                         const int width,
51 |                                         const int num_bbox,
52 |                                         const int channels_trans,
53 |                                         const int no_trans,
54 |                                         const float spatial_scale,
55 |                                         const int output_dim,
56 |                                         const int group_size,
57 |                                         const int pooled_size,
58 |                                         const int part_size,
59 |                                         const int sample_per_part,
60 |                                         const float trans_std);
61 | 
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | 
66 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Copyright (c) 2017 Microsoft
 3 |  * Licensed under The MIT License [see LICENSE for details]
 4 |  * \file deformable_psroi_pooling.cu
 5 |  * \brief
 6 |  * \author Yi Li, Guodong Zhang, Jifeng Dai
 7 | */
 8 | /***************** Adapted by Charles Shang *********************/
 9 | 
10 | #ifndef DCN_V2_PSROI_POOLING_CUDA_DOUBLE
11 | #define DCN_V2_PSROI_POOLING_CUDA_DOUBLE
12 | 
13 | #ifdef __cplusplus
14 | extern "C"
15 | {
16 | #endif
17 | 
18 |     void DeformablePSROIPoolForward(cudaStream_t stream,
19 |                                     const double *data,
20 |                                     const double *bbox,
21 |                                     const double *trans,
22 |                                     double *out,
23 |                                     double *top_count,
24 |                                     const int batch,
25 |                                     const int channels,
26 |                                     const int height,
27 |                                     const int width,
28 |                                     const int num_bbox,
29 |                                     const int channels_trans,
30 |                                     const int no_trans,
31 |                                     const double spatial_scale,
32 |                                     const int output_dim,
33 |                                     const int group_size,
34 |                                     const int pooled_size,
35 |                                     const int part_size,
36 |                                     const int sample_per_part,
37 |                                     const double trans_std);
38 | 
39 |     void DeformablePSROIPoolBackwardAcc(cudaStream_t stream,
40 |                                         const double *out_grad,
41 |                                         const double *data,
42 |                                         const double *bbox,
43 |                                         const double *trans,
44 |                                         const double *top_count,
45 |                                         double *in_grad,
46 |                                         double *trans_grad,
47 |                                         const int batch,
48 |                                         const int channels,
49 |                                         const int height,
50 |                                         const int width,
51 |                                         const int num_bbox,
52 |                                         const int channels_trans,
53 |                                         const int no_trans,
54 |                                         const double spatial_scale,
55 |                                         const int output_dim,
56 |                                         const int group_size,
57 |                                         const int pooled_size,
58 |                                         const int part_size,
59 |                                         const int sample_per_part,
60 |                                         const double trans_std);
61 | 
62 | #ifdef __cplusplus
63 | }
64 | #endif
65 | 
66 | #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <stdio.h>
 3 | #include <math.h>
 4 | 
 5 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight,
 6 |                         THFloatTensor *bias, THFloatTensor *ones,
 7 |                         THFloatTensor *offset, THFloatTensor *mask,
 8 |                         THFloatTensor *output, THFloatTensor *columns,
 9 |                         const int pad_h, const int pad_w,
10 |                         const int stride_h, const int stride_w,
11 |                         const int dilation_h, const int dilation_w,
12 |                         const int deformable_group)
13 | {
14 |     printf("only implemented in GPU");
15 | }
16 |     void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight,
17 |                          THFloatTensor *bias, THFloatTensor *ones,
18 |                          THFloatTensor *offset, THFloatTensor *mask,
19 |                          THFloatTensor *output, THFloatTensor *columns,
20 |                          THFloatTensor *grad_input, THFloatTensor *grad_weight,
21 |                          THFloatTensor *grad_bias, THFloatTensor *grad_offset,
22 |                          THFloatTensor *grad_mask, THFloatTensor *grad_output,
23 |                          int kernel_h, int kernel_w,
24 |                          int stride_h, int stride_w,
25 |                          int pad_h, int pad_w,
26 |                          int dilation_h, int dilation_w,
27 |                          int deformable_group)
28 | {
29 |     printf("only implemented in GPU");
30 | }


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2.h:
--------------------------------------------------------------------------------
 1 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight,
 2 |                         THFloatTensor *bias, THFloatTensor *ones,
 3 |                         THFloatTensor *offset, THFloatTensor *mask,
 4 |                         THFloatTensor *output, THFloatTensor *columns,
 5 |                         const int pad_h, const int pad_w,
 6 |                         const int stride_h, const int stride_w,
 7 |                         const int dilation_h, const int dilation_w,
 8 |                         const int deformable_group);
 9 | void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight,
10 |                         THFloatTensor *bias, THFloatTensor *ones,
11 |                         THFloatTensor *offset, THFloatTensor *mask,
12 |                         THFloatTensor *output, THFloatTensor *columns,
13 |                         THFloatTensor *grad_input, THFloatTensor *grad_weight,
14 |                         THFloatTensor *grad_bias, THFloatTensor *grad_offset,
15 |                         THFloatTensor *grad_mask, THFloatTensor *grad_output,
16 |                         int kernel_h, int kernel_w,
17 |                         int stride_h, int stride_w,
18 |                         int pad_h, int pad_w,
19 |                         int dilation_h, int dilation_w,
20 |                         int deformable_group);


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_cuda.h:
--------------------------------------------------------------------------------
 1 | // #ifndef DCN_V2_CUDA
 2 | // #define DCN_V2_CUDA
 3 | 
 4 | // #ifdef __cplusplus
 5 | // extern "C"
 6 | // {
 7 | // #endif
 8 | 
 9 | void dcn_v2_cuda_forward(THCudaTensor *input, THCudaTensor *weight,
10 |                          THCudaTensor *bias, THCudaTensor *ones,
11 |                          THCudaTensor *offset, THCudaTensor *mask,
12 |                          THCudaTensor *output, THCudaTensor *columns,
13 |                          int kernel_h, int kernel_w,
14 |                          const int stride_h, const int stride_w,
15 |                          const int pad_h, const int pad_w,
16 |                          const int dilation_h, const int dilation_w,
17 |                          const int deformable_group);
18 | void dcn_v2_cuda_backward(THCudaTensor *input, THCudaTensor *weight,
19 |                           THCudaTensor *bias, THCudaTensor *ones,
20 |                           THCudaTensor *offset, THCudaTensor *mask,
21 |                           THCudaTensor *columns,
22 |                           THCudaTensor *grad_input, THCudaTensor *grad_weight,
23 |                           THCudaTensor *grad_bias, THCudaTensor *grad_offset,
24 |                           THCudaTensor *grad_mask, THCudaTensor *grad_output,
25 |                           int kernel_h, int kernel_w,
26 |                           int stride_h, int stride_w,
27 |                           int pad_h, int pad_w,
28 |                           int dilation_h, int dilation_w,
29 |                           int deformable_group);
30 | 
31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox,
32 |                                        THCudaTensor * trans, 
33 |                                        THCudaTensor * out, THCudaTensor * top_count,
34 |                                        const int no_trans,
35 |                                        const float spatial_scale,
36 |                                        const int output_dim,
37 |                                        const int group_size,
38 |                                        const int pooled_size,
39 |                                        const int part_size,
40 |                                        const int sample_per_part,
41 |                                        const float trans_std);
42 | 
43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaTensor * out_grad, 
44 |                                         THCudaTensor * input, THCudaTensor * bbox,
45 |                                         THCudaTensor * trans, THCudaTensor * top_count,
46 |                                         THCudaTensor * input_grad, THCudaTensor * trans_grad,
47 |                                         const int no_trans,
48 |                                         const float spatial_scale,
49 |                                         const int output_dim,
50 |                                         const int group_size,
51 |                                         const int pooled_size,
52 |                                         const int part_size,
53 |                                         const int sample_per_part,
54 |                                         const float trans_std);
55 | 
56 | // #ifdef __cplusplus
57 | // }
58 | // #endif
59 | 
60 | // #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_cuda_double.h:
--------------------------------------------------------------------------------
 1 | // #ifndef DCN_V2_CUDA
 2 | // #define DCN_V2_CUDA
 3 | 
 4 | // #ifdef __cplusplus
 5 | // extern "C"
 6 | // {
 7 | // #endif
 8 | 
 9 | void dcn_v2_cuda_forward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
10 |                          THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
11 |                          THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
12 |                          THCudaDoubleTensor *output, THCudaDoubleTensor *columns,
13 |                          int kernel_h, int kernel_w,
14 |                          const int stride_h, const int stride_w,
15 |                          const int pad_h, const int pad_w,
16 |                          const int dilation_h, const int dilation_w,
17 |                          const int deformable_group);
18 | void dcn_v2_cuda_backward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight,
19 |                           THCudaDoubleTensor *bias, THCudaDoubleTensor *ones,
20 |                           THCudaDoubleTensor *offset, THCudaDoubleTensor *mask,
21 |                           THCudaDoubleTensor *columns,
22 |                           THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_weight,
23 |                           THCudaDoubleTensor *grad_bias, THCudaDoubleTensor *grad_offset,
24 |                           THCudaDoubleTensor *grad_mask, THCudaDoubleTensor *grad_output,
25 |                           int kernel_h, int kernel_w,
26 |                           int stride_h, int stride_w,
27 |                           int pad_h, int pad_w,
28 |                           int dilation_h, int dilation_w,
29 |                           int deformable_group);
30 | 
31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
32 |                                        THCudaDoubleTensor * trans, 
33 |                                        THCudaDoubleTensor * out, THCudaDoubleTensor * top_count,
34 |                                        const int no_trans,
35 |                                        const double spatial_scale,
36 |                                        const int output_dim,
37 |                                        const int group_size,
38 |                                        const int pooled_size,
39 |                                        const int part_size,
40 |                                        const int sample_per_part,
41 |                                        const double trans_std);
42 | 
43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaDoubleTensor * out_grad, 
44 |                                         THCudaDoubleTensor * input, THCudaDoubleTensor * bbox,
45 |                                         THCudaDoubleTensor * trans, THCudaDoubleTensor * top_count,
46 |                                         THCudaDoubleTensor * input_grad, THCudaDoubleTensor * trans_grad,
47 |                                         const int no_trans,
48 |                                         const double spatial_scale,
49 |                                         const int output_dim,
50 |                                         const int group_size,
51 |                                         const int pooled_size,
52 |                                         const int part_size,
53 |                                         const int sample_per_part,
54 |                                         const double trans_std);
55 | 
56 | 
57 | // #ifdef __cplusplus
58 | // }
59 | // #endif
60 | 
61 | // #endif


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_double.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <stdio.h>
 3 | #include <math.h>
 4 | 
 5 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight,
 6 |                     THDoubleTensor *bias, THDoubleTensor *ones,
 7 |                     THDoubleTensor *offset, THDoubleTensor *mask,
 8 |                     THDoubleTensor *output, THDoubleTensor *columns,
 9 |                     const int pad_h, const int pad_w,
10 |                     const int stride_h, const int stride_w,
11 |                     const int dilation_h, const int dilation_w,
12 |                     const int deformable_group)
13 | {
14 |     printf("only implemented in GPU");
15 | }
16 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight,
17 |                      THDoubleTensor *bias, THDoubleTensor *ones,
18 |                      THDoubleTensor *offset, THDoubleTensor *mask,
19 |                      THDoubleTensor *output, THDoubleTensor *columns,
20 |                      THDoubleTensor *grad_input, THDoubleTensor *grad_weight,
21 |                      THDoubleTensor *grad_bias, THDoubleTensor *grad_offset,
22 |                      THDoubleTensor *grad_mask, THDoubleTensor *grad_output,
23 |                      int kernel_h, int kernel_w,
24 |                      int stride_h, int stride_w,
25 |                      int pad_h, int pad_w,
26 |                      int dilation_h, int dilation_w,
27 |                      int deformable_group)
28 | {
29 |     printf("only implemented in GPU");
30 | }


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/src/dcn_v2_double.h:
--------------------------------------------------------------------------------
 1 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight,
 2 |                     THDoubleTensor *bias, THDoubleTensor *ones,
 3 |                     THDoubleTensor *offset, THDoubleTensor *mask,
 4 |                     THDoubleTensor *output, THDoubleTensor *columns,
 5 |                     const int pad_h, const int pad_w,
 6 |                     const int stride_h, const int stride_w,
 7 |                     const int dilation_h, const int dilation_w,
 8 |                     const int deformable_group);
 9 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight,
10 |                      THDoubleTensor *bias, THDoubleTensor *ones,
11 |                      THDoubleTensor *offset, THDoubleTensor *mask,
12 |                      THDoubleTensor *output, THDoubleTensor *columns,
13 |                      THDoubleTensor *grad_input, THDoubleTensor *grad_weight,
14 |                      THDoubleTensor *grad_bias, THDoubleTensor *grad_offset,
15 |                      THDoubleTensor *grad_mask, THDoubleTensor *grad_output,
16 |                      int kernel_h, int kernel_w,
17 |                      int stride_h, int stride_w,
18 |                      int pad_h, int pad_w,
19 |                      int dilation_h, int dilation_w,
20 |                      int deformable_group);


--------------------------------------------------------------------------------
/src/lib/models/networks/DCNv2/test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import absolute_import
  3 | from __future__ import print_function
  4 | from __future__ import division
  5 | 
  6 | import time
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch.autograd import gradcheck
 10 | 
 11 | from dcn_v2 import DCNv2
 12 | from dcn_v2_func import DCNv2Function
 13 | from dcn_v2 import DCNv2Pooling
 14 | from dcn_v2_func import DCNv2PoolingFunction
 15 | 
 16 | deformable_groups = 1
 17 | N, inC, inH, inW = 2, 2, 4, 4
 18 | outC = 2
 19 | kH, kW = 3, 3
 20 | 
 21 | def conv_identify(weight, bias):
 22 |     weight.data.zero_()
 23 |     bias.data.zero_()
 24 |     o, i, h, w = weight.shape
 25 |     y = h//2
 26 |     x = w//2
 27 |     for p in range(i):
 28 |         for q in range(o):
 29 |             if p == q:
 30 |                 weight.data[q, p, y, x] = 1.0
 31 | 
 32 | def check_zero_offset():
 33 |     conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
 34 |         kernel_size=(kH, kW),
 35 |         stride=(1, 1),
 36 |         padding=(1, 1),
 37 |         bias=True).cuda()
 38 | 
 39 |     conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
 40 |         kernel_size=(kH, kW),
 41 |         stride=(1, 1),
 42 |         padding=(1, 1),
 43 |         bias=True).cuda()
 44 | 
 45 |     dcn_v2 = DCNv2(inC, outC, (kH, kW),
 46 |                    stride=1, padding=1, dilation=1,
 47 |                    deformable_groups=deformable_groups).cuda()
 48 | 
 49 |     conv_offset.weight.data.zero_()
 50 |     conv_offset.bias.data.zero_()
 51 |     conv_mask.weight.data.zero_()
 52 |     conv_mask.bias.data.zero_()
 53 |     conv_identify(dcn_v2.weight, dcn_v2.bias)
 54 | 
 55 |     input = torch.randn(N, inC, inH, inW).cuda()
 56 |     offset = conv_offset(input)
 57 |     mask = conv_mask(input)
 58 |     mask = torch.sigmoid(mask)
 59 |     output = dcn_v2(input, offset, mask)
 60 |     output *= 2
 61 |     d = (input - output).abs().max()
 62 |     if d < 1e-10:
 63 |         print('Zero offset passed')
 64 |     else:
 65 |         print('Zero offset failed')
 66 | 
 67 | def check_gradient_dconv_double():
 68 | 
 69 |     input = torch.randn(N, inC, inH, inW, dtype=torch.float64).cuda()
 70 |     input.requires_grad = True
 71 | 
 72 |     offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW, dtype=torch.float64).cuda()
 73 |     # offset.data.zero_()
 74 |     # offset.data -= 0.00001
 75 |     offset.requires_grad = True
 76 | 
 77 |     mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW, dtype=torch.float64).cuda()
 78 |     # mask.data.zero_()
 79 |     mask.requires_grad = True
 80 |     mask = torch.sigmoid(mask)
 81 | 
 82 |     weight = torch.randn(outC, inC, kH, kW, dtype=torch.float64).cuda()
 83 |     weight.requires_grad = True
 84 | 
 85 |     bias = torch.rand(outC, dtype=torch.float64).cuda()
 86 |     bias.requires_grad = True
 87 | 
 88 |     func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups)
 89 | 
 90 |     print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-6, atol=1e-5, rtol=1e-3))
 91 | 
 92 | def check_gradient_dconv():
 93 | 
 94 |     input = torch.randn(N, inC, inH, inW).cuda()
 95 |     input.requires_grad = True
 96 | 
 97 |     offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda()
 98 |     # offset.data.zero_()
 99 |     # offset.data -= 0.5
100 |     offset.requires_grad = True
101 | 
102 |     mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
103 |     # mask.data.zero_()
104 |     mask.requires_grad = True
105 |     mask = torch.sigmoid(mask)
106 | 
107 |     weight = torch.randn(outC, inC, kH, kW).cuda()
108 |     weight.requires_grad = True
109 | 
110 |     bias = torch.rand(outC).cuda()
111 |     bias.requires_grad = True
112 | 
113 |     func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups)
114 | 
115 |     print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-3, atol=1e-3, rtol=1e-2))
116 | 
117 | def check_pooling_zero_offset():
118 |     from dcn_v2 import DCNv2Pooling
119 |     input = torch.randn(2, 16, 64, 64).cuda().zero_()
120 |     input[0, :, 16:26, 16:26] = 1.
121 |     input[1, :, 10:20, 20:30] = 2.
122 |     rois = torch.tensor([
123 |         [0, 65, 65, 103, 103],
124 |         [1, 81, 41, 119, 79],
125 |     ]).cuda().float()
126 |     pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
127 |                            pooled_size=7,
128 |                            output_dim=16,
129 |                            no_trans=True,
130 |                            group_size=1,
131 |                            trans_std=0.1).cuda()
132 | 
133 |     out = pooling(input, rois, input.new())
134 |     s = ', '.join(['%f' % out[i, :, :, :].mean().item() for i in range(rois.shape[0])])
135 |     print(s)
136 | 
137 |     dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
138 |                             pooled_size=7,
139 |                             output_dim=16,
140 |                             no_trans=False,
141 |                             group_size=1,
142 |                             trans_std=0.1).cuda()
143 |     offset = torch.randn(20, 2, 7, 7).cuda().zero_()
144 |     dout = dpooling(input, rois, offset)
145 |     s = ', '.join(['%f' % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])])
146 |     print(s)
147 | 
148 | def check_gradient_dpooling():
149 |     input = torch.randn(2, 3, 5, 5).cuda() * 0.01
150 |     N = 4
151 |     batch_inds = torch.randint(2, (N, 1)).cuda().float()
152 |     x = torch.rand((N, 1)).cuda().float() * 15
153 |     y = torch.rand((N, 1)).cuda().float() * 15
154 |     w = torch.rand((N, 1)).cuda().float() * 10
155 |     h = torch.rand((N, 1)).cuda().float() * 10
156 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
157 |     offset = torch.randn(N, 2, 3, 3).cuda()
158 |     dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
159 |                             pooled_size=3,
160 |                             output_dim=3,
161 |                             no_trans=False,
162 |                             group_size=1,
163 |                             trans_std=0.0).cuda()
164 |     input.requires_grad = True
165 |     offset.requires_grad = True
166 |     print('check_gradient_dpooling', gradcheck(dpooling, (input, rois, offset), eps=1e-4))
167 | 
168 | 
169 | def example_dconv():
170 |     from dcn_v2 import DCN
171 |     input = torch.randn(2, 64, 128, 128).cuda()
172 |     # wrap all things (offset and mask) in DCN
173 |     dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
174 |     output = dcn(input)
175 |     targert = output.new(*output.size())
176 |     targert.data.uniform_(-0.01, 0.01)
177 |     error = (targert - output).mean()
178 |     error.backward()
179 |     print(output.shape)
180 | 
181 | def example_dpooling():
182 |     from dcn_v2 import DCNv2Pooling
183 |     input = torch.randn(2, 32, 64, 64).cuda()
184 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
185 |     x = torch.randint(256, (20, 1)).cuda().float()
186 |     y = torch.randint(256, (20, 1)).cuda().float()
187 |     w = torch.randint(64, (20, 1)).cuda().float()
188 |     h = torch.randint(64, (20, 1)).cuda().float()
189 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
190 |     offset = torch.randn(20, 2, 7, 7).cuda()
191 |     input.requires_grad = True
192 |     offset.requires_grad = True
193 | 
194 |     # normal roi_align
195 |     pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
196 |                            pooled_size=7,
197 |                            output_dim=32,
198 |                            no_trans=True,
199 |                            group_size=1,
200 |                            trans_std=0.1).cuda()
201 | 
202 |     # deformable pooling
203 |     dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
204 |                             pooled_size=7,
205 |                             output_dim=32,
206 |                             no_trans=False,
207 |                             group_size=1,
208 |                             trans_std=0.1).cuda()
209 | 
210 |     out = pooling(input, rois, offset)
211 |     dout = dpooling(input, rois, offset)
212 |     print(out.shape)
213 |     print(dout.shape)
214 | 
215 |     target_out = out.new(*out.size())
216 |     target_out.data.uniform_(-0.01, 0.01)
217 |     target_dout = dout.new(*dout.size())
218 |     target_dout.data.uniform_(-0.01, 0.01)
219 |     e = (target_out - out).mean()
220 |     e.backward()
221 |     e = (target_dout - dout).mean()
222 |     e.backward()
223 | 
224 | def example_mdpooling():
225 |     from dcn_v2 import DCNPooling
226 |     input = torch.randn(2, 32, 64, 64).cuda()
227 |     input.requires_grad = True
228 |     batch_inds = torch.randint(2, (20, 1)).cuda().float()
229 |     x = torch.randint(256, (20, 1)).cuda().float()
230 |     y = torch.randint(256, (20, 1)).cuda().float()
231 |     w = torch.randint(64, (20, 1)).cuda().float()
232 |     h = torch.randint(64, (20, 1)).cuda().float()
233 |     rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
234 | 
235 |     # mdformable pooling (V2)
236 |     dpooling = DCNPooling(spatial_scale=1.0 / 4,
237 |                          pooled_size=7,
238 |                          output_dim=32,
239 |                          no_trans=False,
240 |                          group_size=1,
241 |                          trans_std=0.1).cuda()
242 | 
243 |     dout = dpooling(input, rois)
244 |     target = dout.new(*dout.size())
245 |     target.data.uniform_(-0.1, 0.1)
246 |     error = (target - dout).mean()
247 |     error.backward()
248 |     print(dout.shape)
249 | 
250 | if __name__ == '__main__':
251 | 
252 |     example_dconv()
253 |     example_dpooling()
254 |     example_mdpooling()
255 | 
256 |     check_pooling_zero_offset()
257 |     # zero offset check
258 |     if inC == outC:
259 |         check_zero_offset()
260 | 
261 |     check_gradient_dpooling()
262 | 
263 |     # # gradient check
264 |     # try:
265 |     #     check_gradient_double()
266 |     # except TypeError:
267 |     #     print('''****** You can swith to double precision in dcn_v2_func.py by (un)commenting these two lines:
268 |     #              ****** from _ext import dcn_v2 as _backend
269 |     #              ****** from _ext import dcn_v2_double as _backend''')
270 |     #     print('****** Your tensor may not be **double** type')
271 |     #     print('****** Switching to **float** type')
272 |     #
273 |     #     check_gradient()
274 |     # finally:
275 |     #     print('****** Note: backward is not reentrant error may not be a serious problem, '
276 |     #           '****** since the max error is less than 1e-7\n'
277 |     #           '****** Still looking for what trigger this problem')


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool
2 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/_cpools/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | cpools.egg-info/
3 | dist/
4 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/_cpools/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | 
 6 | import top_pool, bottom_pool, left_pool, right_pool
 7 | 
 8 | class TopPoolFunction(Function):
 9 |     @staticmethod
10 |     def forward(ctx, input):
11 |         output = top_pool.forward(input)[0]
12 |         ctx.save_for_backward(input)
13 |         return output
14 | 
15 |     @staticmethod
16 |     def backward(ctx, grad_output):
17 |         input  = ctx.saved_variables[0]
18 |         output = top_pool.backward(input, grad_output)[0]
19 |         return output
20 | 
21 | class BottomPoolFunction(Function):
22 |     @staticmethod
23 |     def forward(ctx, input):
24 |         output = bottom_pool.forward(input)[0]
25 |         ctx.save_for_backward(input)
26 |         return output
27 | 
28 |     @staticmethod
29 |     def backward(ctx, grad_output):
30 |         input  = ctx.saved_variables[0]
31 |         output = bottom_pool.backward(input, grad_output)[0]
32 |         return output
33 | 
34 | class LeftPoolFunction(Function):
35 |     @staticmethod
36 |     def forward(ctx, input):
37 |         output = left_pool.forward(input)[0]
38 |         ctx.save_for_backward(input)
39 |         return output
40 | 
41 |     @staticmethod
42 |     def backward(ctx, grad_output):
43 |         input  = ctx.saved_variables[0]
44 |         output = left_pool.backward(input, grad_output)[0]
45 |         return output
46 | 
47 | class RightPoolFunction(Function):
48 |     @staticmethod
49 |     def forward(ctx, input):
50 |         output = right_pool.forward(input)[0]
51 |         ctx.save_for_backward(input)
52 |         return output
53 | 
54 |     @staticmethod
55 |     def backward(ctx, grad_output):
56 |         input  = ctx.saved_variables[0]
57 |         output = right_pool.backward(input, grad_output)[0]
58 |         return output
59 | 
60 | class TopPool(nn.Module):
61 |     def forward(self, x):
62 |         return TopPoolFunction.apply(x)
63 | 
64 | class BottomPool(nn.Module):
65 |     def forward(self, x):
66 |         return BottomPoolFunction.apply(x)
67 | 
68 | class LeftPool(nn.Module):
69 |     def forward(self, x):
70 |         return LeftPoolFunction.apply(x)
71 | 
72 | class RightPool(nn.Module):
73 |     def forward(self, x):
74 |         return RightPoolFunction.apply(x)
75 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/_cpools/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension
 3 | 
 4 | setup(
 5 |     name="cpools",
 6 |     ext_modules=[
 7 |         CppExtension("top_pool", ["src/top_pool.cpp"]),
 8 |         CppExtension("bottom_pool", ["src/bottom_pool.cpp"]),
 9 |         CppExtension("left_pool", ["src/left_pool.cpp"]),
10 |         CppExtension("right_pool", ["src/right_pool.cpp"])
11 |     ],
12 |     cmdclass={
13 |         "build_ext": BuildExtension
14 |     }
15 | )
16 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/_cpools/src/bottom_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < height; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 2, ind, height);
18 |         at::Tensor cur_temp = at::slice(output, 2, ind, height);
19 |         at::Tensor next_temp = at::slice(output, 2, 0, height-ind);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     auto output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(2, 0);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(0);
46 | 
47 |     auto output_temp      = output.select(2, 0);
48 |     auto grad_output_temp = grad_output.select(2, 0);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(2);
52 |     auto gt_mask    = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 0; ind < height - 1; ++ind) {
55 |         input_temp = input.select(2, ind + 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, ind + 1);
61 | 
62 |         grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
63 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &pool_forward, "Bottom Pool Forward",
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &pool_backward, "Bottom Pool Backward",
78 |         py::call_guard<py::gil_scoped_release>()
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/_cpools/src/left_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < width; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 3, 0, width-ind); 
18 |         at::Tensor cur_temp = at::slice(output, 3, 0, width-ind);        
19 |         at::Tensor next_temp = at::slice(output, 3, ind, width);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     auto output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(3, width - 1);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(width - 1);
46 | 
47 |     auto output_temp      = output.select(3, width - 1);
48 |     auto grad_output_temp = grad_output.select(3, width - 1);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(3);
52 |     auto gt_mask    = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 1; ind < width; ++ind) {
55 |         input_temp = input.select(3, width - ind - 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, width - ind - 1);
61 | 
62 |         grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
63 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &pool_forward, "Left Pool Forward", 
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &pool_backward, "Left Pool Backward", 
78 |         py::call_guard<py::gil_scoped_release>()
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/_cpools/src/right_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get width
12 |     int64_t width = input.size(3);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < width; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 3, ind, width); 
18 |         at::Tensor cur_temp = at::slice(output, 3, ind, width);        
19 |         at::Tensor next_temp = at::slice(output, 3, 0, width-ind);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     at::Tensor output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(3, 0);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(0);
46 | 
47 |     auto output_temp      = output.select(3, 0);
48 |     auto grad_output_temp = grad_output.select(3, 0);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(3);
52 |     auto gt_mask    = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 0; ind < width - 1; ++ind) {
55 |         input_temp = input.select(3, ind + 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, ind + 1);
61 | 
62 |         grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
63 |         output.scatter_add_(3, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &pool_forward, "Right Pool Forward",
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &pool_backward, "Right Pool Backward",
78 |         py::call_guard<py::gil_scoped_release>()     
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/_cpools/src/top_pool.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/torch.h>
 2 | 
 3 | #include <vector>
 4 | 
 5 | std::vector<at::Tensor> top_pool_forward(
 6 |     at::Tensor input
 7 | ) {
 8 |     // Initialize output
 9 |     at::Tensor output = at::zeros_like(input);
10 | 
11 |     // Get height
12 |     int64_t height = input.size(2);
13 | 
14 |     output.copy_(input);
15 | 
16 |     for (int64_t ind = 1; ind < height; ind <<= 1) {
17 |         at::Tensor max_temp = at::slice(output, 2, 0, height-ind);
18 |         at::Tensor cur_temp = at::slice(output, 2, 0, height-ind);
19 |         at::Tensor next_temp = at::slice(output, 2, ind, height);
20 |         at::max_out(max_temp, cur_temp, next_temp);
21 |     }
22 | 
23 |     return { 
24 |         output
25 |     };
26 | }
27 | 
28 | std::vector<at::Tensor> top_pool_backward(
29 |     at::Tensor input,
30 |     at::Tensor grad_output
31 | ) {
32 |     auto output = at::zeros_like(input);
33 | 
34 |     int32_t batch   = input.size(0);
35 |     int32_t channel = input.size(1);
36 |     int32_t height  = input.size(2);
37 |     int32_t width   = input.size(3);
38 | 
39 |     auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
40 |     auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong));
41 | 
42 |     auto input_temp = input.select(2, height - 1);
43 |     max_val.copy_(input_temp);
44 | 
45 |     max_ind.fill_(height - 1);
46 | 
47 |     auto output_temp      = output.select(2, height - 1);
48 |     auto grad_output_temp = grad_output.select(2, height - 1);
49 |     output_temp.copy_(grad_output_temp);
50 | 
51 |     auto un_max_ind = max_ind.unsqueeze(2);
52 |     auto gt_mask    = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte));
53 |     auto max_temp   = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat));
54 |     for (int32_t ind = 1; ind < height; ++ind) {
55 |         input_temp = input.select(2, height - ind - 1);
56 |         at::gt_out(gt_mask, input_temp, max_val);
57 | 
58 |         at::masked_select_out(max_temp, input_temp, gt_mask);
59 |         max_val.masked_scatter_(gt_mask, max_temp);
60 |         max_ind.masked_fill_(gt_mask, height - ind - 1);
61 | 
62 |         grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
63 |         output.scatter_add_(2, un_max_ind, grad_output_temp);
64 |     }
65 | 
66 |     return {
67 |         output
68 |     };
69 | }
70 | 
71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
72 |     m.def(
73 |         "forward", &top_pool_forward, "Top Pool Forward",
74 |         py::call_guard<py::gil_scoped_release>()
75 |     );
76 |     m.def(
77 |         "backward", &top_pool_backward, "Top Pool Backward",
78 |         py::call_guard<py::gil_scoped_release>()
79 |     );
80 | }
81 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/data_parallel.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn.modules import Module
  3 | from torch.nn.parallel.scatter_gather import gather
  4 | from torch.nn.parallel.replicate import replicate
  5 | from torch.nn.parallel.parallel_apply import parallel_apply
  6 | 
  7 | from .scatter_gather import scatter_kwargs
  8 | 
  9 | class DataParallel(Module):
 10 |     r"""Implements data parallelism at the module level.
 11 | 
 12 |     This container parallelizes the application of the given module by
 13 |     splitting the input across the specified devices by chunking in the batch
 14 |     dimension. In the forward pass, the module is replicated on each device,
 15 |     and each replica handles a portion of the input. During the backwards
 16 |     pass, gradients from each replica are summed into the original module.
 17 | 
 18 |     The batch size should be larger than the number of GPUs used. It should
 19 |     also be an integer multiple of the number of GPUs so that each chunk is the
 20 |     same size (so that each GPU processes the same number of samples).
 21 | 
 22 |     See also: :ref:`cuda-nn-dataparallel-instead`
 23 | 
 24 |     Arbitrary positional and keyword inputs are allowed to be passed into
 25 |     DataParallel EXCEPT Tensors. All variables will be scattered on dim
 26 |     specified (default 0). Primitive types will be broadcasted, but all
 27 |     other types will be a shallow copy and can be corrupted if written to in
 28 |     the model's forward pass.
 29 | 
 30 |     Args:
 31 |         module: module to be parallelized
 32 |         device_ids: CUDA devices (default: all devices)
 33 |         output_device: device location of output (default: device_ids[0])
 34 | 
 35 |     Example::
 36 | 
 37 |         >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
 38 |         >>> output = net(input_var)
 39 |     """
 40 | 
 41 |     # TODO: update notes/cuda.rst when this class handles 8+ GPUs well
 42 | 
 43 |     def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
 44 |         super(DataParallel, self).__init__()
 45 | 
 46 |         if not torch.cuda.is_available():
 47 |             self.module = module
 48 |             self.device_ids = []
 49 |             return
 50 | 
 51 |         if device_ids is None:
 52 |             device_ids = list(range(torch.cuda.device_count()))
 53 |         if output_device is None:
 54 |             output_device = device_ids[0]
 55 |         self.dim = dim
 56 |         self.module = module
 57 |         self.device_ids = device_ids
 58 |         self.chunk_sizes = chunk_sizes
 59 |         self.output_device = output_device
 60 |         if len(self.device_ids) == 1:
 61 |             self.module.cuda(device_ids[0])
 62 | 
 63 |     def forward(self, *inputs, **kwargs):
 64 |         if not self.device_ids:
 65 |             return self.module(*inputs, **kwargs)
 66 |         inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
 67 |         if len(self.device_ids) == 1:
 68 |             return self.module(*inputs[0], **kwargs[0])
 69 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
 70 |         outputs = self.parallel_apply(replicas, inputs, kwargs)
 71 |         return self.gather(outputs, self.output_device)
 72 | 
 73 |     def replicate(self, module, device_ids):
 74 |         return replicate(module, device_ids)
 75 | 
 76 |     def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
 77 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
 78 | 
 79 |     def parallel_apply(self, replicas, inputs, kwargs):
 80 |         return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
 81 | 
 82 |     def gather(self, outputs, output_device):
 83 |         return gather(outputs, output_device, dim=self.dim)
 84 | 
 85 | 
 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
 87 |     r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
 88 | 
 89 |     This is the functional version of the DataParallel module.
 90 | 
 91 |     Args:
 92 |         module: the module to evaluate in parallel
 93 |         inputs: inputs to the module
 94 |         device_ids: GPU ids on which to replicate module
 95 |         output_device: GPU location of the output  Use -1 to indicate the CPU.
 96 |             (default: device_ids[0])
 97 |     Returns:
 98 |         a Variable containing the result of module(input) located on
 99 |         output_device
100 |     """
101 |     if not isinstance(inputs, tuple):
102 |         inputs = (inputs,)
103 | 
104 |     if device_ids is None:
105 |         device_ids = list(range(torch.cuda.device_count()))
106 | 
107 |     if output_device is None:
108 |         output_device = device_ids[0]
109 | 
110 |     inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
111 |     if len(device_ids) == 1:
112 |         return module(*inputs[0], **module_kwargs[0])
113 |     used_device_ids = device_ids[:len(inputs)]
114 |     replicas = replicate(module, used_device_ids)
115 |     outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
116 |     return gather(outputs, output_device, dim)
117 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/losses.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from .utils import _tranpose_and_gather_feat
  5 | 
  6 | def _sigmoid(x):
  7 |     return torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
  8 | 
  9 | def _ae_loss(tag0, tag1, mask):
 10 |     num  = mask.sum(dim=1, keepdim=True).float()
 11 |     tag0 = tag0.squeeze()
 12 |     tag1 = tag1.squeeze()
 13 | 
 14 |     tag_mean = (tag0 + tag1) / 2
 15 | 
 16 |     tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4)
 17 |     tag0 = tag0[mask].sum()
 18 |     tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4)
 19 |     tag1 = tag1[mask].sum()
 20 |     pull = tag0 + tag1
 21 | 
 22 |     mask = mask.unsqueeze(1) + mask.unsqueeze(2)
 23 |     mask = mask.eq(2)
 24 |     num  = num.unsqueeze(2)
 25 |     num2 = (num - 1) * num
 26 |     dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2)
 27 |     dist = 1 - torch.abs(dist)
 28 |     dist = nn.functional.relu(dist, inplace=True)
 29 |     dist = dist - 1 / (num + 1e-4)
 30 |     dist = dist / (num2 + 1e-4)
 31 |     dist = dist[mask]
 32 |     push = dist.sum()
 33 |     return pull, push
 34 | 
 35 | def _off_loss(off, gt_off, mask):
 36 |     num  = mask.float().sum()
 37 |     mask = mask.unsqueeze(2).expand_as(gt_off)
 38 | 
 39 |     off    = off[mask]
 40 |     gt_off = gt_off[mask]
 41 |     
 42 |     off_loss = nn.functional.smooth_l1_loss(off, gt_off, reduction="sum")
 43 |     off_loss = off_loss / (num + 1e-4)
 44 |     return off_loss
 45 | 
 46 | def _focal_loss_mask(preds, gt, mask):
 47 |     pos_inds = gt.eq(1)
 48 |     neg_inds = gt.lt(1)
 49 | 
 50 |     neg_weights = torch.pow(1 - gt[neg_inds], 4)
 51 | 
 52 |     pos_mask = mask[pos_inds]
 53 |     neg_mask = mask[neg_inds]
 54 | 
 55 |     loss = 0
 56 |     for pred in preds:
 57 |         pos_pred = pred[pos_inds]
 58 |         neg_pred = pred[neg_inds]
 59 | 
 60 |         pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * pos_mask
 61 |         neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights * neg_mask
 62 | 
 63 |         num_pos  = pos_inds.float().sum()
 64 |         pos_loss = pos_loss.sum()
 65 |         neg_loss = neg_loss.sum()
 66 | 
 67 |         if pos_pred.nelement() == 0:
 68 |             loss = loss - neg_loss
 69 |         else:
 70 |             loss = loss - (pos_loss + neg_loss) / num_pos
 71 |     return loss
 72 | 
 73 | def _focal_loss(preds, gt):
 74 |     pos_inds = gt.eq(1)
 75 |     neg_inds = gt.lt(1)
 76 | 
 77 |     neg_weights = torch.pow(1 - gt[neg_inds], 4)
 78 | 
 79 |     loss = 0
 80 |     for pred in preds:
 81 |         pos_pred = pred[pos_inds]
 82 |         neg_pred = pred[neg_inds]
 83 | 
 84 |         pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
 85 |         neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
 86 | 
 87 |         num_pos  = pos_inds.float().sum()
 88 |         pos_loss = pos_loss.sum()
 89 |         neg_loss = neg_loss.sum()
 90 | 
 91 |         if pos_pred.nelement() == 0:
 92 |             loss = loss - neg_loss
 93 |         else:
 94 |             loss = loss - (pos_loss + neg_loss) / num_pos
 95 |     return loss
 96 | 
 97 | class CornerNet_Saccade_Loss(nn.Module):
 98 |     def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss_mask):
 99 |         super(CornerNet_Saccade_Loss, self).__init__()
100 | 
101 |         self.pull_weight = pull_weight
102 |         self.push_weight = push_weight
103 |         self.off_weight  = off_weight
104 |         self.focal_loss  = focal_loss
105 |         self.ae_loss     = _ae_loss
106 |         self.off_loss    = _off_loss
107 | 
108 |     def forward(self, outs, targets):
109 |         tl_heats = outs[0]
110 |         br_heats = outs[1]
111 |         tl_tags  = outs[2]
112 |         br_tags  = outs[3]
113 |         tl_offs  = outs[4]
114 |         br_offs  = outs[5]
115 |         atts     = outs[6]
116 | 
117 |         gt_tl_heat  = targets[0]
118 |         gt_br_heat  = targets[1]
119 |         gt_mask     = targets[2]
120 |         gt_tl_off   = targets[3]
121 |         gt_br_off   = targets[4]
122 |         gt_tl_ind   = targets[5]
123 |         gt_br_ind   = targets[6]
124 |         gt_tl_valid = targets[7]
125 |         gt_br_valid = targets[8]
126 |         gt_atts     = targets[9]
127 | 
128 |         # focal loss
129 |         focal_loss = 0
130 | 
131 |         tl_heats = [_sigmoid(t) for t in tl_heats]
132 |         br_heats = [_sigmoid(b) for b in br_heats]
133 | 
134 |         focal_loss += self.focal_loss(tl_heats, gt_tl_heat, gt_tl_valid)
135 |         focal_loss += self.focal_loss(br_heats, gt_br_heat, gt_br_valid)
136 | 
137 |         atts = [[_sigmoid(a) for a in att] for att in atts]
138 |         atts = [[att[ind] for att in atts] for ind in range(len(gt_atts))]
139 | 
140 |         att_loss = 0
141 |         for att, gt_att in zip(atts, gt_atts):
142 |             att_loss += _focal_loss(att, gt_att) / max(len(att), 1)
143 | 
144 |         # tag loss
145 |         pull_loss = 0
146 |         push_loss = 0
147 |         tl_tags   = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags]
148 |         br_tags   = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags]
149 |         for tl_tag, br_tag in zip(tl_tags, br_tags):
150 |             pull, push = self.ae_loss(tl_tag, br_tag, gt_mask)
151 |             pull_loss += pull
152 |             push_loss += push
153 |         pull_loss = self.pull_weight * pull_loss
154 |         push_loss = self.push_weight * push_loss
155 | 
156 |         off_loss = 0
157 |         tl_offs  = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs]
158 |         br_offs  = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs]
159 |         for tl_off, br_off in zip(tl_offs, br_offs):
160 |             off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask)
161 |             off_loss += self.off_loss(br_off, gt_br_off, gt_mask)
162 |         off_loss = self.off_weight * off_loss
163 | 
164 |         loss = (focal_loss + att_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1)
165 |         return loss.unsqueeze(0)
166 | 
167 | class CornerNet_Loss(nn.Module):
168 |     def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss):
169 |         super(CornerNet_Loss, self).__init__()
170 | 
171 |         self.pull_weight = pull_weight
172 |         self.push_weight = push_weight
173 |         self.off_weight  = off_weight
174 |         self.focal_loss  = focal_loss
175 |         self.ae_loss     = _ae_loss
176 |         self.off_loss    = _off_loss
177 | 
178 |     def forward(self, outs, targets):
179 |         tl_heats = outs[0]
180 |         br_heats = outs[1]
181 |         tl_tags  = outs[2]
182 |         br_tags  = outs[3]
183 |         tl_offs  = outs[4]
184 |         br_offs  = outs[5]
185 | 
186 |         gt_tl_heat  = targets[0]
187 |         gt_br_heat  = targets[1]
188 |         gt_mask     = targets[2]
189 |         gt_tl_off   = targets[3]
190 |         gt_br_off   = targets[4]
191 |         gt_tl_ind   = targets[5]
192 |         gt_br_ind   = targets[6]
193 | 
194 |         # focal loss
195 |         focal_loss = 0
196 | 
197 |         tl_heats = [_sigmoid(t) for t in tl_heats]
198 |         br_heats = [_sigmoid(b) for b in br_heats]
199 | 
200 |         focal_loss += self.focal_loss(tl_heats, gt_tl_heat)
201 |         focal_loss += self.focal_loss(br_heats, gt_br_heat)
202 | 
203 |         # tag loss
204 |         pull_loss = 0
205 |         push_loss = 0
206 |         tl_tags   = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags]
207 |         br_tags   = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags]
208 |         for tl_tag, br_tag in zip(tl_tags, br_tags):
209 |             pull, push = self.ae_loss(tl_tag, br_tag, gt_mask)
210 |             pull_loss += pull
211 |             push_loss += push
212 |         pull_loss = self.pull_weight * pull_loss
213 |         push_loss = self.push_weight * push_loss
214 | 
215 |         off_loss = 0
216 |         tl_offs  = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs]
217 |         br_offs  = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs]
218 |         for tl_off, br_off in zip(tl_offs, br_offs):
219 |             off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask)
220 |             off_loss += self.off_loss(br_off, gt_br_off, gt_mask)
221 |         off_loss = self.off_weight * off_loss
222 | 
223 |         loss = (focal_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1)
224 |         return loss.unsqueeze(0)
225 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/src/lib/models/networks/py_utils/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | def _gather_feat(feat, ind, mask=None):
  5 |     dim  = feat.size(2)
  6 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
  7 |     feat = feat.gather(1, ind)
  8 |     if mask is not None:
  9 |         mask = mask.unsqueeze(2).expand_as(feat)
 10 |         feat = feat[mask]
 11 |         feat = feat.view(-1, dim)
 12 |     return feat
 13 | 
 14 | def _nms(heat, kernel=1):
 15 |     pad = (kernel - 1) // 2
 16 | 
 17 |     hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad)
 18 |     keep = (hmax == heat).float()
 19 |     return heat * keep
 20 | 
 21 | def _tranpose_and_gather_feat(feat, ind):
 22 |     feat = feat.permute(0, 2, 3, 1).contiguous()
 23 |     feat = feat.view(feat.size(0), -1, feat.size(3))
 24 |     feat = _gather_feat(feat, ind)
 25 |     return feat
 26 | 
 27 | def _topk(scores, K=20):
 28 |     batch, cat, height, width = scores.size()
 29 | 
 30 |     topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
 31 | 
 32 |     topk_clses = (topk_inds / (height * width)).int()
 33 | 
 34 |     topk_inds = topk_inds % (height * width)
 35 |     topk_ys   = (topk_inds / width).int().float()
 36 |     topk_xs   = (topk_inds % width).int().float()
 37 |     return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
 38 | 
 39 | def _decode(
 40 |     tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, 
 41 |     K=100, kernel=1, ae_threshold=1, num_dets=1000, no_border=False
 42 | ):
 43 |     batch, cat, height, width = tl_heat.size()
 44 | 
 45 |     tl_heat = torch.sigmoid(tl_heat)
 46 |     br_heat = torch.sigmoid(br_heat)
 47 | 
 48 |     # perform nms on heatmaps
 49 |     tl_heat = _nms(tl_heat, kernel=kernel)
 50 |     br_heat = _nms(br_heat, kernel=kernel)
 51 | 
 52 |     tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K)
 53 |     br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K)
 54 | 
 55 |     tl_ys = tl_ys.view(batch, K, 1).expand(batch, K, K)
 56 |     tl_xs = tl_xs.view(batch, K, 1).expand(batch, K, K)
 57 |     br_ys = br_ys.view(batch, 1, K).expand(batch, K, K)
 58 |     br_xs = br_xs.view(batch, 1, K).expand(batch, K, K)
 59 | 
 60 |     if no_border:
 61 |         tl_ys_binds = (tl_ys == 0)
 62 |         tl_xs_binds = (tl_xs == 0)
 63 |         br_ys_binds = (br_ys == height - 1)
 64 |         br_xs_binds = (br_xs == width  - 1)
 65 | 
 66 |     if tl_regr is not None and br_regr is not None:
 67 |         tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds)
 68 |         tl_regr = tl_regr.view(batch, K, 1, 2)
 69 |         br_regr = _tranpose_and_gather_feat(br_regr, br_inds)
 70 |         br_regr = br_regr.view(batch, 1, K, 2)
 71 | 
 72 |         tl_xs = tl_xs + tl_regr[..., 0]
 73 |         tl_ys = tl_ys + tl_regr[..., 1]
 74 |         br_xs = br_xs + br_regr[..., 0]
 75 |         br_ys = br_ys + br_regr[..., 1]
 76 | 
 77 |     # all possible boxes based on top k corners (ignoring class)
 78 |     bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3)
 79 | 
 80 |     tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds)
 81 |     tl_tag = tl_tag.view(batch, K, 1)
 82 |     br_tag = _tranpose_and_gather_feat(br_tag, br_inds)
 83 |     br_tag = br_tag.view(batch, 1, K)
 84 |     dists  = torch.abs(tl_tag - br_tag)
 85 | 
 86 |     tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K)
 87 |     br_scores = br_scores.view(batch, 1, K).expand(batch, K, K)
 88 |     scores    = (tl_scores + br_scores) / 2
 89 | 
 90 |     # reject boxes based on classes
 91 |     tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K)
 92 |     br_clses = br_clses.view(batch, 1, K).expand(batch, K, K)
 93 |     cls_inds = (tl_clses != br_clses)
 94 | 
 95 |     # reject boxes based on distances
 96 |     dist_inds = (dists > ae_threshold)
 97 | 
 98 |     # reject boxes based on widths and heights
 99 |     width_inds  = (br_xs < tl_xs)
100 |     height_inds = (br_ys < tl_ys)
101 | 
102 |     if no_border:
103 |         scores[tl_ys_binds] = -1
104 |         scores[tl_xs_binds] = -1
105 |         scores[br_ys_binds] = -1
106 |         scores[br_xs_binds] = -1
107 | 
108 |     scores[cls_inds]    = -1
109 |     scores[dist_inds]   = -1
110 |     scores[width_inds]  = -1
111 |     scores[height_inds] = -1
112 | 
113 |     scores = scores.view(batch, -1)
114 |     scores, inds = torch.topk(scores, num_dets)
115 |     scores = scores.unsqueeze(2)
116 | 
117 |     bboxes = bboxes.view(batch, -1, 4)
118 |     bboxes = _gather_feat(bboxes, inds)
119 | 
120 |     clses  = tl_clses.contiguous().view(batch, -1, 1)
121 |     clses  = _gather_feat(clses, inds).float()
122 | 
123 |     tl_scores = tl_scores.contiguous().view(batch, -1, 1)
124 |     tl_scores = _gather_feat(tl_scores, inds).float()
125 |     br_scores = br_scores.contiguous().view(batch, -1, 1)
126 |     br_scores = _gather_feat(br_scores, inds).float()
127 | 
128 |     detections = torch.cat([bboxes, scores, tl_scores, br_scores, clses], dim=2)
129 |     return detections
130 | 
131 | class upsample(nn.Module):
132 |     def __init__(self, scale_factor):
133 |         super(upsample, self).__init__()
134 |         self.scale_factor = scale_factor
135 | 
136 |     def forward(self, x):
137 |         return nn.functional.interpolate(x, scale_factor=self.scale_factor)
138 | 
139 | class merge(nn.Module):
140 |     def forward(self, x, y):
141 |         return x + y
142 | 
143 | class convolution(nn.Module):
144 |     def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
145 |         super(convolution, self).__init__()
146 | 
147 |         pad = (k - 1) // 2
148 |         self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
149 |         self.bn   = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
150 |         self.relu = nn.ReLU(inplace=True)
151 | 
152 |     def forward(self, x):
153 |         conv = self.conv(x)
154 |         bn   = self.bn(conv)
155 |         relu = self.relu(bn)
156 |         return relu
157 | 
158 | class residual(nn.Module):
159 |     def __init__(self, inp_dim, out_dim, k=3, stride=1):
160 |         super(residual, self).__init__()
161 |         p = (k - 1) // 2
162 | 
163 |         self.conv1 = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(p, p), stride=(stride, stride), bias=False)
164 |         self.bn1   = nn.BatchNorm2d(out_dim)
165 |         self.relu1 = nn.ReLU(inplace=True)
166 | 
167 |         self.conv2 = nn.Conv2d(out_dim, out_dim, (k, k), padding=(p, p), bias=False)
168 |         self.bn2   = nn.BatchNorm2d(out_dim)
169 |         
170 |         self.skip  = nn.Sequential(
171 |             nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
172 |             nn.BatchNorm2d(out_dim)
173 |         ) if stride != 1 or inp_dim != out_dim else nn.Sequential()
174 |         self.relu  = nn.ReLU(inplace=True)
175 | 
176 |     def forward(self, x):
177 |         conv1 = self.conv1(x)
178 |         bn1   = self.bn1(conv1)
179 |         relu1 = self.relu1(bn1)
180 | 
181 |         conv2 = self.conv2(relu1)
182 |         bn2   = self.bn2(conv2)
183 | 
184 |         skip  = self.skip(x)
185 |         return self.relu(bn2 + skip)
186 | 
187 | class corner_pool(nn.Module):
188 |     def __init__(self, dim, pool1, pool2):
189 |         super(corner_pool, self).__init__()
190 |         self._init_layers(dim, pool1, pool2)
191 | 
192 |     def _init_layers(self, dim, pool1, pool2):
193 |         self.p1_conv1 = convolution(3, dim, 128)
194 |         self.p2_conv1 = convolution(3, dim, 128)
195 | 
196 |         self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False)
197 |         self.p_bn1   = nn.BatchNorm2d(dim)
198 | 
199 |         self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False)
200 |         self.bn1   = nn.BatchNorm2d(dim)
201 |         self.relu1 = nn.ReLU(inplace=True)
202 | 
203 |         self.conv2 = convolution(3, dim, dim)
204 | 
205 |         self.pool1 = pool1()
206 |         self.pool2 = pool2()
207 | 
208 |     def forward(self, x):
209 |         # pool 1
210 |         p1_conv1 = self.p1_conv1(x)
211 |         pool1    = self.pool1(p1_conv1)
212 | 
213 |         # pool 2
214 |         p2_conv1 = self.p2_conv1(x)
215 |         pool2    = self.pool2(p2_conv1)
216 | 
217 |         # pool 1 + pool 2
218 |         p_conv1 = self.p_conv1(pool1 + pool2)
219 |         p_bn1   = self.p_bn1(p_conv1)
220 | 
221 |         conv1 = self.conv1(x)
222 |         bn1   = self.bn1(conv1)
223 |         relu1 = self.relu1(p_bn1 + bn1)
224 | 
225 |         conv2 = self.conv2(relu1)
226 |         return conv2
227 | 


--------------------------------------------------------------------------------
/src/lib/models/scatter_gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.nn.parallel._functions import Scatter, Gather
 4 | 
 5 | 
 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
 7 |     r"""
 8 |     Slices variables into approximately equal chunks and
 9 |     distributes them across given GPUs. Duplicates
10 |     references to objects that are not variables. Does not
11 |     support Tensors.
12 |     """
13 |     def scatter_map(obj):
14 |         if isinstance(obj, Variable):
15 |             return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
16 |         assert not torch.is_tensor(obj), "Tensors not supported in scatter."
17 |         if isinstance(obj, tuple):
18 |             return list(zip(*map(scatter_map, obj)))
19 |         if isinstance(obj, list):
20 |             return list(map(list, zip(*map(scatter_map, obj))))
21 |         if isinstance(obj, dict):
22 |             return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
23 |         return [obj for targets in target_gpus]
24 | 
25 |     return scatter_map(inputs)
26 | 
27 | 
28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
29 |     r"""Scatter with support for kwargs dictionary"""
30 |     inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
31 |     kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
32 |     if len(inputs) < len(kwargs):
33 |         inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
34 |     elif len(kwargs) < len(inputs):
35 |         kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
36 |     inputs = tuple(inputs)
37 |     kwargs = tuple(kwargs)
38 |     return inputs, kwargs
39 | 


--------------------------------------------------------------------------------
/src/lib/models/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | def _sigmoid(x):
 9 |   y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
10 |   return y
11 | 
12 | def _gather_feat(feat, ind, mask=None):
13 |     dim  = feat.size(2)
14 |     ind  = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
15 |     feat = feat.gather(1, ind)
16 |     if mask is not None:
17 |         mask = mask.unsqueeze(2).expand_as(feat)
18 |         feat = feat[mask]
19 |         feat = feat.view(-1, dim)
20 |     return feat
21 | 
22 | def _tranpose_and_gather_feat(feat, ind):
23 |     feat = feat.permute(0, 2, 3, 1).contiguous()
24 |     feat = feat.view(feat.size(0), -1, feat.size(3))
25 |     feat = _gather_feat(feat, ind)
26 |     return feat
27 | 
28 | def flip_tensor(x):
29 |     return torch.flip(x, [3])
30 |     # tmp = x.detach().cpu().numpy()[..., ::-1].copy()
31 |     # return torch.from_numpy(tmp).to(x.device)
32 | 
33 | def flip_lr(x, flip_idx):
34 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
35 |   shape = tmp.shape
36 |   for e in flip_idx:
37 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
38 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
39 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)
40 | 
41 | def flip_lr_off(x, flip_idx):
42 |   tmp = x.detach().cpu().numpy()[..., ::-1].copy()
43 |   shape = tmp.shape
44 |   tmp = tmp.reshape(tmp.shape[0], 17, 2, 
45 |                     tmp.shape[2], tmp.shape[3])
46 |   tmp[:, :, 0, :, :] *= -1
47 |   for e in flip_idx:
48 |     tmp[:, e[0], ...], tmp[:, e[1], ...] = \
49 |       tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
50 |   return torch.from_numpy(tmp.reshape(shape)).to(x.device)


--------------------------------------------------------------------------------
/src/lib/trains/base_trainer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import time
  6 | import torch
  7 | from progress.bar import Bar
  8 | from models.data_parallel import DataParallel
  9 | from utils.utils import AverageMeter
 10 | 
 11 | 
 12 | class ModleWithLoss(torch.nn.Module):
 13 |   def __init__(self, model, loss):
 14 |     super(ModleWithLoss, self).__init__()
 15 |     self.model = model
 16 |     self.loss = loss
 17 |   
 18 |   def forward(self, batch):
 19 |     outputs = self.model(batch['input'])
 20 |     loss, loss_stats = self.loss(outputs, batch)
 21 |     return outputs[-1], loss, loss_stats
 22 | 
 23 | 
 24 | class BaseTrainer(object):
 25 |   def __init__(
 26 |     self, opt, model, optimizer=None):
 27 |     self.opt = opt
 28 |     self.optimizer = optimizer
 29 |     self.loss_stats, self.loss = self._get_losses(opt)
 30 |     self.model_with_loss = ModleWithLoss(model, self.loss)
 31 | 
 32 |   def set_device(self, gpus, chunk_sizes, device):
 33 |     if len(gpus) > 1:
 34 |       self.model_with_loss = DataParallel(
 35 |         self.model_with_loss, device_ids=gpus, 
 36 |         chunk_sizes=chunk_sizes).to(device)
 37 |     else:
 38 |       self.model_with_loss = self.model_with_loss.to(device)
 39 |     
 40 |     for state in self.optimizer.state.values():
 41 |       for k, v in state.items():
 42 |         if isinstance(v, torch.Tensor):
 43 |           state[k] = v.to(device=device, non_blocking=True)
 44 | 
 45 |   def run_epoch(self, phase, epoch, data_loader):
 46 |     model_with_loss = self.model_with_loss
 47 |     if phase == 'train':
 48 |       model_with_loss.train()
 49 |     else:
 50 |       if len(self.opt.gpus) > 1:
 51 |         model_with_loss = self.model_with_loss.module
 52 |       model_with_loss.eval()
 53 |       torch.cuda.empty_cache()
 54 | 
 55 |     opt = self.opt
 56 |     results = {}
 57 |     data_time, batch_time = AverageMeter(), AverageMeter()
 58 |     avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
 59 |     num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
 60 |     bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
 61 |     end = time.time()
 62 |     for iter_id, batch in enumerate(data_loader):
 63 |       if iter_id >= num_iters:
 64 |         break
 65 |       data_time.update(time.time() - end)
 66 | 
 67 |       for k in batch:
 68 |         if k != 'meta':
 69 |           batch[k] = batch[k].to(device=opt.device, non_blocking=True)    
 70 |       output, loss, loss_stats = model_with_loss(batch)
 71 |       loss = loss.mean()
 72 |       if phase == 'train':
 73 |         self.optimizer.zero_grad()
 74 |         loss.backward()
 75 |         self.optimizer.step()
 76 |       batch_time.update(time.time() - end)
 77 |       end = time.time()
 78 | 
 79 |       Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format(
 80 |         epoch, iter_id, num_iters, phase=phase,
 81 |         total=bar.elapsed_td, eta=bar.eta_td)
 82 |       for l in avg_loss_stats:
 83 |         avg_loss_stats[l].update(
 84 |           loss_stats[l].mean().item(), batch['input'].size(0))
 85 |         Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
 86 |       if not opt.hide_data_time:
 87 |         Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
 88 |           '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
 89 |       if opt.print_iter > 0:
 90 |         if iter_id % opt.print_iter == 0:
 91 |           print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 
 92 |       else:
 93 |         bar.next()
 94 |       
 95 |       if opt.debug > 0:
 96 |         self.debug(batch, output, iter_id)
 97 |       
 98 |       if opt.test:
 99 |         self.save_result(output, batch, results)
100 |       del output, loss, loss_stats
101 |     
102 |     bar.finish()
103 |     ret = {k: v.avg for k, v in avg_loss_stats.items()}
104 |     ret['time'] = bar.elapsed_td.total_seconds() / 60.
105 |     return ret, results
106 |   
107 |   def debug(self, batch, output, iter_id):
108 |     raise NotImplementedError
109 | 
110 |   def save_result(self, output, batch, results):
111 |     raise NotImplementedError
112 | 
113 |   def _get_losses(self, opt):
114 |     raise NotImplementedError
115 | 
116 |   def val(self, epoch, data_loader):
117 |     return self.run_epoch('val', epoch, data_loader)
118 | 
119 |   def train(self, epoch, data_loader):
120 |     return self.run_epoch('train', epoch, data_loader)
121 | 


--------------------------------------------------------------------------------
/src/lib/trains/ctdet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | 
  8 | from models.losses import FocalLoss
  9 | from models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss
 10 | from models.decode import ctdet_decode
 11 | from models.utils import _sigmoid
 12 | from utils.debugger import Debugger
 13 | from utils.post_process import ctdet_post_process
 14 | from utils.oracle_utils import gen_oracle_map
 15 | from .base_trainer import BaseTrainer
 16 | 
 17 | 
 18 | class CtdetLoss(torch.nn.Module):
 19 |   def __init__(self, opt):
 20 |     super(CtdetLoss, self).__init__()
 21 |     self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
 22 |     self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
 23 |               RegLoss() if opt.reg_loss == 'sl1' else None
 24 |     self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \
 25 |               NormRegL1Loss() if opt.norm_wh else \
 26 |               RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg
 27 |     self.opt = opt
 28 | 
 29 |   def forward(self, outputs, batch):
 30 |     opt = self.opt
 31 |     hm_act_loss, wh_act_loss = 0, 0
 32 |     for s in range(opt.num_stacks):
 33 |       output = outputs[s]
 34 |       if not opt.mse_loss:
 35 |         output['hm_act_f'] = _sigmoid(output['hm_act_f'])
 36 |       hm_act_loss += self.crit(output['hm_act_f'], batch['hm_act']) / opt.num_stacks
 37 |       wh_act_loss += self.crit_reg(output['wh_act'], batch['reg_act_mask'],
 38 |                                       batch['ind_act'], batch['wh_act']) / opt.num_stacks
 39 |     loss = opt.hm_act_weight * hm_act_loss + opt.wh_weight * wh_act_loss
 40 |     loss_stats = {'loss': loss, 'hm_act_loss': hm_act_loss, 'wh_act_loss': wh_act_loss}
 41 |     return loss, loss_stats
 42 | 
 43 | 
 44 | class CtdetTrainer(BaseTrainer):
 45 |   def __init__(self, opt, model, optimizer=None):
 46 |     super(CtdetTrainer, self).__init__(opt, model, optimizer=optimizer)
 47 | 
 48 |   def _get_losses(self, opt):
 49 |     loss_states = ['loss', 'hm_act_loss', 'wh_act_loss']
 50 |     loss = CtdetLoss(opt)
 51 |     return loss_states, loss
 52 | 
 53 |   def debug(self, batch, output, iter_id):
 54 |     opt = self.opt
 55 |     reg = output['reg'] if opt.reg_offset else None
 56 |     dets = ctdet_decode(
 57 |       output['hm'], output['wh'], reg=reg,
 58 |       cat_spec_wh=opt.cat_spec_wh, K=opt.K)
 59 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
 60 |     dets[:, :, :4] *= opt.down_ratio
 61 |     dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
 62 |     dets_gt[:, :, :4] *= opt.down_ratio
 63 |     for i in range(1):
 64 |       debugger = Debugger(
 65 |         dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme)
 66 |       img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
 67 |       img = np.clip(((
 68 |         img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
 69 |       pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy())
 70 |       gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
 71 |       debugger.add_blend_img(img, pred, 'pred_hm')
 72 |       debugger.add_blend_img(img, gt, 'gt_hm')
 73 |       debugger.add_img(img, img_id='out_pred')
 74 |       for k in range(len(dets[i])):
 75 |         if dets[i, k, 4] > opt.center_thresh:
 76 |           debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
 77 |                                  dets[i, k, 4], img_id='out_pred')
 78 | 
 79 |       debugger.add_img(img, img_id='out_gt')
 80 |       for k in range(len(dets_gt[i])):
 81 |         if dets_gt[i, k, 4] > opt.center_thresh:
 82 |           debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
 83 |                                  dets_gt[i, k, 4], img_id='out_gt')
 84 | 
 85 |       if opt.debug == 4:
 86 |         debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id))
 87 |       else:
 88 |         debugger.show_all_imgs(pause=True)
 89 | 
 90 |   def save_result(self, output, batch, results):
 91 |     reg = output['reg'] if self.opt.reg_offset else None
 92 |     dets = ctdet_decode(
 93 |       output['hm'], output['wh'], reg=reg,
 94 |       cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
 95 |     dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
 96 |     dets_out = ctdet_post_process(
 97 |       dets.copy(), batch['meta']['c'].cpu().numpy(),
 98 |       batch['meta']['s'].cpu().numpy(),
 99 |       output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1])
100 |     results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
101 | 


--------------------------------------------------------------------------------
/src/lib/trains/train_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | from .ctdet import CtdetTrainer
 6 | 
 7 | train_factory = {
 8 |   'ctdet': CtdetTrainer}
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/utils/__init__.py


--------------------------------------------------------------------------------
/src/lib/utils/ddd_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import cv2
  7 | 
  8 | def compute_box_3d(dim, location, rotation_y):
  9 |   # dim: 3
 10 |   # location: 3
 11 |   # rotation_y: 1
 12 |   # return: 8 x 3
 13 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 14 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 15 |   l, w, h = dim[2], dim[1], dim[0]
 16 |   x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
 17 |   y_corners = [0,0,0,0,-h,-h,-h,-h]
 18 |   z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
 19 | 
 20 |   corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
 21 |   corners_3d = np.dot(R, corners) 
 22 |   corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1)
 23 |   return corners_3d.transpose(1, 0)
 24 | 
 25 | def project_to_image(pts_3d, P):
 26 |   # pts_3d: n x 3
 27 |   # P: 3 x 4
 28 |   # return: n x 2
 29 |   pts_3d_homo = np.concatenate(
 30 |     [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
 31 |   pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
 32 |   pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
 33 |   # import pdb; pdb.set_trace()
 34 |   return pts_2d
 35 | 
 36 | def compute_orientation_3d(dim, location, rotation_y):
 37 |   # dim: 3
 38 |   # location: 3
 39 |   # rotation_y: 1
 40 |   # return: 2 x 3
 41 |   c, s = np.cos(rotation_y), np.sin(rotation_y)
 42 |   R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
 43 |   orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32)
 44 |   orientation_3d = np.dot(R, orientation_3d)
 45 |   orientation_3d = orientation_3d + \
 46 |                    np.array(location, dtype=np.float32).reshape(3, 1)
 47 |   return orientation_3d.transpose(1, 0)
 48 | 
 49 | def draw_box_3d(image, corners, c=(0, 0, 255)):
 50 |   face_idx = [[0,1,5,4],
 51 |               [1,2,6, 5],
 52 |               [2,3,7,6],
 53 |               [3,0,4,7]]
 54 |   for ind_f in range(3, -1, -1):
 55 |     f = face_idx[ind_f]
 56 |     for j in range(4):
 57 |       cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
 58 |                (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), c, 2, lineType=cv2.LINE_AA)
 59 |     if ind_f == 0:
 60 |       cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
 61 |                (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA)
 62 |       cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
 63 |                (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA)
 64 |   return image
 65 | 
 66 | def unproject_2d_to_3d(pt_2d, depth, P):
 67 |   # pts_2d: 2
 68 |   # depth: 1
 69 |   # P: 3 x 4
 70 |   # return: 3
 71 |   z = depth - P[2, 3]
 72 |   x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
 73 |   y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
 74 |   pt_3d = np.array([x, y, z], dtype=np.float32)
 75 |   return pt_3d
 76 | 
 77 | def alpha2rot_y(alpha, x, cx, fx):
 78 |     """
 79 |     Get rotation_y by alpha + theta - 180
 80 |     alpha : Observation angle of object, ranging [-pi..pi]
 81 |     x : Object center x to the camera center (x-W/2), in pixels
 82 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
 83 |     """
 84 |     rot_y = alpha + np.arctan2(x - cx, fx)
 85 |     if rot_y > np.pi:
 86 |       rot_y -= 2 * np.pi
 87 |     if rot_y < -np.pi:
 88 |       rot_y += 2 * np.pi
 89 |     return rot_y
 90 | 
 91 | def rot_y2alpha(rot_y, x, cx, fx):
 92 |     """
 93 |     Get rotation_y by alpha + theta - 180
 94 |     alpha : Observation angle of object, ranging [-pi..pi]
 95 |     x : Object center x to the camera center (x-W/2), in pixels
 96 |     rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
 97 |     """
 98 |     alpha = rot_y - np.arctan2(x - cx, fx)
 99 |     if alpha > np.pi:
100 |       alpha -= 2 * np.pi
101 |     if alpha < -np.pi:
102 |       alpha += 2 * np.pi
103 |     return alpha
104 | 
105 | 
106 | def ddd2locrot(center, alpha, dim, depth, calib):
107 |   # single image
108 |   locations = unproject_2d_to_3d(center, depth, calib)
109 |   locations[1] += dim[0] / 2
110 |   rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
111 |   return locations, rotation_y
112 | 
113 | def project_3d_bbox(location, dim, rotation_y, calib):
114 |   box_3d = compute_box_3d(dim, location, rotation_y)
115 |   box_2d = project_to_image(box_3d, calib)
116 |   return box_2d
117 | 
118 | 
119 | if __name__ == '__main__':
120 |   calib = np.array(
121 |     [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01],
122 |      [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01],
123 |      [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]],
124 |     dtype=np.float32)
125 |   alpha = -0.20
126 |   tl = np.array([712.40, 143.00], dtype=np.float32)
127 |   br = np.array([810.73, 307.92], dtype=np.float32)
128 |   ct = (tl + br) / 2
129 |   rotation_y = 0.01
130 |   print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0]))
131 |   print('rotation_y', rotation_y)


--------------------------------------------------------------------------------
/src/lib/utils/image.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
  5 | # Modified by Xingyi Zhou
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import numpy as np
 13 | import cv2
 14 | import random
 15 | 
 16 | 
 17 | def flip(img):
 18 |   return img[:, :, ::-1].copy()
 19 | 
 20 | 
 21 | def transform_preds(coords, center, scale, output_size):
 22 |     target_coords = np.zeros(coords.shape)
 23 |     trans = get_affine_transform(center, scale, 0, output_size, inv=1)
 24 |     for p in range(coords.shape[0]):
 25 |         target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
 26 |     return target_coords
 27 | 
 28 | 
 29 | def get_affine_transform(center,
 30 |                          scale,
 31 |                          rot,
 32 |                          output_size,
 33 |                          shift=np.array([0, 0], dtype=np.float32),
 34 |                          inv=0):
 35 |     if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
 36 |         scale = np.array([scale, scale], dtype=np.float32)
 37 | 
 38 |     scale_tmp = scale
 39 |     src_w = scale_tmp[0]
 40 |     dst_w = output_size[0]
 41 |     dst_h = output_size[1]
 42 | 
 43 |     rot_rad = np.pi * rot / 180
 44 |     src_dir = get_dir([0, src_w * -0.5], rot_rad)
 45 |     dst_dir = np.array([0, dst_w * -0.5], np.float32)
 46 | 
 47 |     src = np.zeros((3, 2), dtype=np.float32)
 48 |     dst = np.zeros((3, 2), dtype=np.float32)
 49 |     src[0, :] = center + scale_tmp * shift
 50 |     src[1, :] = center + src_dir + scale_tmp * shift
 51 |     dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
 52 |     dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir
 53 | 
 54 |     src[2:, :] = get_3rd_point(src[0, :], src[1, :])
 55 |     dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
 56 | 
 57 |     if inv:
 58 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
 59 |     else:
 60 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
 61 | 
 62 |     return trans
 63 | 
 64 | 
 65 | def affine_transform(pt, t):
 66 |     new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T
 67 |     new_pt = np.dot(t, new_pt)
 68 |     return new_pt[:2]
 69 | 
 70 | 
 71 | def get_3rd_point(a, b):
 72 |     direct = a - b
 73 |     return b + np.array([-direct[1], direct[0]], dtype=np.float32)
 74 | 
 75 | 
 76 | def get_dir(src_point, rot_rad):
 77 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
 78 | 
 79 |     src_result = [0, 0]
 80 |     src_result[0] = src_point[0] * cs - src_point[1] * sn
 81 |     src_result[1] = src_point[0] * sn + src_point[1] * cs
 82 | 
 83 |     return src_result
 84 | 
 85 | 
 86 | def crop(img, center, scale, output_size, rot=0):
 87 |     trans = get_affine_transform(center, scale, rot, output_size)
 88 | 
 89 |     dst_img = cv2.warpAffine(img,
 90 |                              trans,
 91 |                              (int(output_size[0]), int(output_size[1])),
 92 |                              flags=cv2.INTER_LINEAR)
 93 | 
 94 |     return dst_img
 95 | 
 96 | 
 97 | def gaussian_radius(det_size, min_overlap=0.7):
 98 |   height, width = det_size
 99 | 
100 |   a1  = 1
101 |   b1  = (height + width)
102 |   c1  = width * height * (1 - min_overlap) / (1 + min_overlap)
103 |   sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1)
104 |   r1  = (b1 + sq1) / 2
105 | 
106 |   a2  = 4
107 |   b2  = 2 * (height + width)
108 |   c2  = (1 - min_overlap) * width * height
109 |   sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2)
110 |   r2  = (b2 + sq2) / 2
111 | 
112 |   a3  = 4 * min_overlap
113 |   b3  = -2 * min_overlap * (height + width)
114 |   c3  = (min_overlap - 1) * width * height
115 |   sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3)
116 |   r3  = (b3 + sq3) / 2
117 |   return min(r1, r2, r3)
118 | 
119 | 
120 | def gaussian2D(shape, sigma=1):
121 |     m, n = [(ss - 1.) / 2. for ss in shape]
122 |     y, x = np.ogrid[-m:m+1,-n:n+1]
123 | 
124 |     h = np.exp(-(x * x + y * y) / (2 * sigma * sigma))
125 |     h[h < np.finfo(h.dtype).eps * h.max()] = 0
126 |     return h
127 | 
128 | 
129 | def draw_umich_gaussian(heatmap, center, radius, k=1):
130 |   diameter = 2 * radius + 1
131 |   gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
132 | 
133 |   x, y = int(center[0]), int(center[1])
134 | 
135 |   height, width = heatmap.shape[0:2]
136 | 
137 |   left, right = min(x, radius), min(width - x, radius + 1)
138 |   top, bottom = min(y, radius), min(height - y, radius + 1)
139 | 
140 |   masked_heatmap  = heatmap[y - top:y + bottom, x - left:x + right]
141 |   masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right]
142 |   if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
143 |     np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap)
144 |   return heatmap
145 | 
146 | 
147 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False):
148 |   diameter = 2 * radius + 1
149 |   gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6)
150 |   value = np.array(value, dtype=np.float32).reshape(-1, 1, 1)
151 |   dim = value.shape[0]
152 |   reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value
153 |   if is_offset and dim == 2:
154 |     delta = np.arange(diameter*2+1) - radius
155 |     reg[0] = reg[0] - delta.reshape(1, -1)
156 |     reg[1] = reg[1] - delta.reshape(-1, 1)
157 | 
158 |   x, y = int(center[0]), int(center[1])
159 | 
160 |   height, width = heatmap.shape[0:2]
161 | 
162 |   left, right = min(x, radius), min(width - x, radius + 1)
163 |   top, bottom = min(y, radius), min(height - y, radius + 1)
164 | 
165 |   masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
166 |   masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right]
167 |   masked_gaussian = gaussian[radius - top:radius + bottom,
168 |                              radius - left:radius + right]
169 |   masked_reg = reg[:, radius - top:radius + bottom,
170 |                       radius - left:radius + right]
171 |   if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug
172 |     idx = (masked_gaussian >= masked_heatmap).reshape(
173 |       1, masked_gaussian.shape[0], masked_gaussian.shape[1])
174 |     masked_regmap = (1-idx) * masked_regmap + idx * masked_reg
175 |   regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap
176 |   return regmap
177 | 
178 | 
179 | def draw_msra_gaussian(heatmap, center, sigma):
180 |   tmp_size = sigma * 3
181 |   mu_x = int(center[0] + 0.5)
182 |   mu_y = int(center[1] + 0.5)
183 |   w, h = heatmap.shape[0], heatmap.shape[1]
184 |   ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
185 |   br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
186 |   if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0:
187 |     return heatmap
188 |   size = 2 * tmp_size + 1
189 |   x = np.arange(0, size, 1, np.float32)
190 |   y = x[:, np.newaxis]
191 |   x0 = y0 = size // 2
192 |   g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
193 |   g_x = max(0, -ul[0]), min(br[0], h) - ul[0]
194 |   g_y = max(0, -ul[1]), min(br[1], w) - ul[1]
195 |   img_x = max(0, ul[0]), min(br[0], h)
196 |   img_y = max(0, ul[1]), min(br[1], w)
197 |   heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
198 |     heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]],
199 |     g[g_y[0]:g_y[1], g_x[0]:g_x[1]])
200 |   return heatmap
201 | 
202 | 
203 | def grayscale(image):
204 |     return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
205 | 
206 | def lighting_(data_rng, image, alphastd, eigval, eigvec):
207 |     alpha = data_rng.normal(scale=alphastd, size=(3, ))
208 |     image += np.dot(eigvec, eigval * alpha)
209 | 
210 | 
211 | def blend_(alpha, image1, image2):
212 |     image1 *= alpha
213 |     image2 *= (1 - alpha)
214 |     image1 += image2
215 | 
216 | 
217 | def saturation_(data_rng, image, gs, gs_mean, var):
218 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
219 |     blend_(alpha, image, gs[:, :, None])
220 | 
221 | 
222 | def brightness_(data_rng, image, gs, gs_mean, var):
223 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
224 |     image *= alpha
225 | 
226 | 
227 | def contrast_(data_rng, image, gs, gs_mean, var):
228 |     alpha = 1. + data_rng.uniform(low=-var, high=var)
229 |     blend_(alpha, image, gs_mean)
230 | 
231 | 
232 | def color_aug(data_rng, image, eig_val, eig_vec):
233 |     functions = [brightness_, contrast_, saturation_]
234 |     random.shuffle(functions)
235 | 
236 |     gs = grayscale(image)
237 |     gs_mean = gs.mean()
238 |     for f in functions:
239 |         f(data_rng, image, gs, gs_mean, 0.4)
240 |     lighting_(data_rng, image, 0.1, eig_val, eig_vec)
241 | 


--------------------------------------------------------------------------------
/src/lib/utils/oracle_utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | import numba
 7 | 
 8 | @numba.jit(nopython=True, nogil=True)
 9 | def gen_oracle_map(feat, ind, w, h):
10 |   # feat: B x maxN x featDim
11 |   # ind: B x maxN
12 |   batch_size = feat.shape[0]
13 |   max_objs = feat.shape[1]
14 |   feat_dim = feat.shape[2]
15 |   out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32)
16 |   vis = np.zeros((batch_size, h, w), dtype=np.uint8)
17 |   ds = [(0, 1), (0, -1), (1, 0), (-1, 0)]
18 |   for i in range(batch_size):
19 |     queue_ind = np.zeros((h*w*2, 2), dtype=np.int32)
20 |     queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32)
21 |     head, tail = 0, 0
22 |     for j in range(max_objs):
23 |       if ind[i][j] > 0:
24 |         x, y = ind[i][j] % w, ind[i][j] // w
25 |         out[i, :, y, x] = feat[i][j]
26 |         vis[i, y, x] = 1
27 |         queue_ind[tail] = x, y
28 |         queue_feat[tail] = feat[i][j]
29 |         tail += 1
30 |     while tail - head > 0:
31 |       x, y = queue_ind[head]
32 |       f = queue_feat[head]
33 |       head += 1
34 |       for (dx, dy) in ds:
35 |         xx, yy = x + dx, y + dy
36 |         if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1:
37 |           out[i, :, yy, xx] = f
38 |           vis[i, yy, xx] = 1
39 |           queue_ind[tail] = xx, yy
40 |           queue_feat[tail] = f
41 |           tail += 1
42 |   return out


--------------------------------------------------------------------------------
/src/lib/utils/post_process.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | from .image import transform_preds
 7 | 
 8 | 
 9 | def get_pred_depth(depth):
10 |   return depth
11 | 
12 | 
13 | def get_alpha(rot):
14 |   # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
15 |   #                 bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
16 |   # return rot[:, 0]
17 |   idx = rot[:, 1] > rot[:, 5]
18 |   alpha1 = np.arctan(rot[:, 2] / rot[:, 3]) + (-0.5 * np.pi)
19 |   alpha2 = np.arctan(rot[:, 6] / rot[:, 7]) + ( 0.5 * np.pi)
20 |   return alpha1 * idx + alpha2 * (1 - idx)
21 | 
22 | 
23 | def ctdet_post_process(dets_act, c, s, h, w, num_obj_classes, num_act_classes):
24 |   ret_act = []
25 |   for i in range(dets_act.shape[0]):
26 |     top_preds_act = {}
27 |     dets_act[i, :, :2] = transform_preds(
28 |           dets_act[i, :, 0:2], c[i], s[i], (w, h))
29 | 
30 |     dets_act[i, :, 2:4] = transform_preds(
31 |           dets_act[i, :, 2:4], c[i], s[i], (w, h))
32 | 
33 |     dets_act[i, :, 4:6] = transform_preds(
34 |           dets_act[i, :, 4:6], c[i], s[i], (w, h))
35 | 
36 |     # print(dets_act[0])
37 | 
38 |     classes_act = dets_act[i, :, -1]
39 | 
40 |     for j in range(num_act_classes):
41 |       inds = (classes_act == j)
42 |       top_preds_act[j + 1] = np.concatenate([
43 |         dets_act[i, inds, :6].astype(np.float32),
44 |         dets_act[i, inds, 6:7].astype(np.float32)], axis=1).tolist()
45 | 
46 |     ret_act.append(top_preds_act)
47 | 
48 |   return ret_act
49 | 


--------------------------------------------------------------------------------
/src/lib/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import torch
 6 | 
 7 | class AverageMeter(object):
 8 |     """Computes and stores the average and current value"""
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     def reset(self):
13 |         self.val = 0
14 |         self.avg = 0
15 |         self.sum = 0
16 |         self.count = 0
17 | 
18 |     def update(self, val, n=1):
19 |         self.val = val
20 |         self.sum += val * n
21 |         self.count += n
22 |         if self.count > 0:
23 |           self.avg = self.sum / self.count


--------------------------------------------------------------------------------
/src/test_HOI.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import pickle
  7 | import json
  8 | import numpy as np
  9 | import _init_paths
 10 | 
 11 | from opts import opts
 12 | from timer import Timer
 13 | from logger import Logger
 14 | from vsrl_eval import VCOCOeval
 15 | from apply_prior import apply_prior
 16 | from datasets.dataset_factory import dataset_factory
 17 | from detectors.detector_factory import detector_factory
 18 | 
 19 | 
 20 | def getSigmoid(sigmoid_coeff, x):
 21 |     a, b, c, d = sigmoid_coeff
 22 |     e = 2.718281828459
 23 |     return a / (1 + e**(b - c * x)) + d
 24 | 
 25 | 
 26 | def dis(A, B):
 27 |     distance = np.sqrt(np.sum(np.square(A - B)))
 28 |     return distance
 29 | 
 30 | 
 31 | def iou(box1, box2):
 32 |     area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
 33 |     area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
 34 |     inter = max(min(box1[2], box2[2]) - max(box1[0], box2[0]) + 1, 0) * \
 35 |             max(min(box1[3], box2[3]) - max(box1[1], box2[1]) + 1, 0)
 36 |     iou = 1.0 * inter / (area1 + area2 - inter)
 37 |     return iou
 38 | 
 39 | 
 40 | def test(opt, Test_RCNN, prior_mask, Action_dic_inv, output_file, human_thres, object_thres, action_thres, detection):
 41 |   os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str
 42 | 
 43 |   Dataset = dataset_factory[opt.dataset]
 44 |   opt = opts().update_dataset_info_and_set_heads(opt, Dataset)
 45 |   print(opt)
 46 |   Logger(opt)
 47 |   Detector = detector_factory[opt.task]
 48 |   detector = Detector(opt)
 49 |   dataset = Dataset(opt, 'test')
 50 |   num_iters = len(dataset)
 51 | 
 52 |   count = 0
 53 |   wo_object_list = [4, 18, 23, 28]
 54 |   total_list = [i for i in range(1, 30)]
 55 |   sigmoid_coeff = (6, 6, 7, 0)
 56 |   h_dis_thresh = 10
 57 |   ho_dis_thresh = 80
 58 | 
 59 |   _t = {'im_detect': Timer(), 'misc': Timer()}
 60 | 
 61 |   for ind in range(num_iters):
 62 |     _t['im_detect'].tic()
 63 | 
 64 |     img_id = dataset.images[ind]
 65 |     img_info = dataset.coco.loadImgs(ids=[img_id])[0]
 66 |     img_path = os.path.join('coco/images/trainval2017/', img_info['file_name'])
 67 | 
 68 |     ret = detector.run(img_path)
 69 | 
 70 |     for H_ins in Test_RCNN[img_id]:
 71 |       if (np.max(H_ins[5]) > human_thres) and (H_ins[1] == 'Human'):  # This is a valid human
 72 |         h_box = H_ins[2]
 73 |         h_c_x, h_c_y = (h_box[0] + h_box[2]) / 2, (h_box[1] + h_box[3]) / 2
 74 |         h_center = np.array([h_c_x, h_c_y])  # obtain the human center
 75 | 
 76 |         # Predict action without corresponding objects
 77 |         prediction_H = np.zeros(29)
 78 |         for i in wo_object_list:
 79 |           if len(ret[0][i]) != 0:
 80 |             for a in ret[0][i]:
 81 |               prediction_H[i-1] = a[6] if a[6] > action_thres and dis(a[0:2], h_center) < h_dis_thresh else 0
 82 | 
 83 |         # save image information
 84 |         dic = {}
 85 |         dic['image_id'] = img_id
 86 |         dic['person_box'] = H_ins[2]
 87 | 
 88 |         h_score = getSigmoid(sigmoid_coeff, H_ins[5])
 89 | 
 90 |         # Predict actions between human and objects
 91 |         Score_obj = np.empty((0, 4 + 29), dtype=np.float32)
 92 | 
 93 |         for O_ins in Test_RCNN[img_id]:
 94 |             if (np.max(O_ins[5]) > object_thres) and (O_ins[1] == 'Object'):  # This is a valid object
 95 |               o_box = O_ins[2]
 96 |               prediction_HO = np.zeros(29)
 97 |               o_score = getSigmoid(sigmoid_coeff, O_ins[5])
 98 |               o_c_x, o_c_y = (o_box[0] + o_box[2]) / 2, (o_box[1] + o_box[3]) / 2
 99 | 
100 |               for j in total_list:
101 |                 if j not in wo_object_list:
102 |                   if len(ret[0][j]) != 0:
103 |                     for a in ret[0][j]:
104 |                       iou_ao = iou(a[2:6], np.array(O_ins[2]))
105 |                       iou_ah = iou(a[2:6], np.array(H_ins[2]))
106 |                       if a[6] > action_thres and iou_ao > 0 and iou_ah > 0:
107 | 
108 |                         ref_box = np.array([min(h_c_x, o_c_x), min(h_c_y, o_c_y),
109 |                                             min(h_c_x, o_c_x), max(h_c_y, o_c_y),
110 |                                             max(h_c_x, o_c_x), min(h_c_y, o_c_y),
111 |                                             max(h_c_x, o_c_x), max(h_c_y, o_c_y)])
112 | 
113 |                         inter_box = np.array([a[2], a[3], a[2], a[5], a[4], a[3], a[4], a[5]])
114 | 
115 |                         dist_tl = dis(ref_box[0:2], inter_box[0:2])
116 |                         dist_tr = dis(ref_box[2:4], inter_box[2:4])
117 |                         dist_bl = dis(ref_box[4:6], inter_box[4:6])
118 |                         dist_br = dis(ref_box[6:8], inter_box[6:8])
119 | 
120 |                         if dist_tl < ho_dis_thresh and dist_tr < ho_dis_thresh \
121 |                                 and dist_bl < ho_dis_thresh and dist_br < ho_dis_thresh:
122 |                             prediction_HO[j-1] = a[6]
123 | 
124 |               prediction_HO = apply_prior(O_ins, prediction_HO)
125 |               prediction_HO = prediction_HO * prior_mask[:, O_ins[4]].reshape(1, 29)
126 |               This_Score_obj = np.concatenate((O_ins[2].reshape(1, 4), prediction_HO * np.max(o_score)), axis=1)
127 |               Score_obj = np.concatenate((Score_obj, This_Score_obj), axis=0)
128 | 
129 |         # There is only a single human detected in this image. I just ignore it. Might be better to add Nan as object box.
130 |         if Score_obj.shape[0] == 0:
131 |           continue
132 | 
133 |         # Find out the object box associated with highest action score
134 |         max_idx = np.argmax(Score_obj, 0)[4:]
135 | 
136 |         # agent mAP
137 |         for i in range(29):
138 |           # '''
139 |           # walk, smile, run, stand
140 |           if (i == 3) or (i == 17) or (i == 22) or (i == 27):
141 |             agent_name = Action_dic_inv[i] + '_agent'
142 |             dic[agent_name] = np.max(h_score) * prediction_H[i]
143 |             continue
144 | 
145 |           # cut
146 |           if i == 2:
147 |             agent_name = 'cut_agent'
148 |             dic[agent_name] = np.max(h_score) * max(Score_obj[max_idx[2]][4 + 2], Score_obj[max_idx[4]][4 + 4])
149 |             continue
150 |           if i == 4:
151 |             continue
152 | 
153 |             # eat
154 |           if i == 9:
155 |             agent_name = 'eat_agent'
156 |             dic[agent_name] = np.max(h_score) * max(Score_obj[max_idx[9]][4 + 9], Score_obj[max_idx[16]][4 + 16])
157 |             continue
158 |           if i == 16:
159 |             continue
160 | 
161 |           # hit
162 |           if i == 19:
163 |             agent_name = 'hit_agent'
164 |             dic[agent_name] = np.max(h_score) * max(Score_obj[max_idx[19]][4 + 19], Score_obj[max_idx[20]][4 + 20])
165 |             continue
166 |           if i == 20:
167 |             continue
168 | 
169 |             # These 2 classes need to save manually because there is '_' in action name
170 |           if i == 6:
171 |             agent_name = 'talk_on_phone_agent'
172 |             dic[agent_name] = np.max(h_score) * Score_obj[max_idx[i]][4 + i]
173 |             continue
174 | 
175 |           if i == 8:
176 |             agent_name = 'work_on_computer_agent'
177 |             dic[agent_name] = np.max(h_score) * Score_obj[max_idx[i]][4 + i]
178 |             continue
179 | 
180 |             # all the rest
181 |           agent_name = Action_dic_inv[i].split("_")[0] + '_agent'
182 |           dic[agent_name] = np.max(h_score) * Score_obj[max_idx[i]][4 + i]
183 |           # '''
184 | 
185 |         # role mAP
186 |         for i in range(29):
187 |           # walk, smile, run, stand. Won't contribute to role mAP
188 |           if (i == 3) or (i == 17) or (i == 22) or (i == 27):
189 |             dic[Action_dic_inv[i]] = np.append(np.full(4, np.nan).reshape(1, 4),
190 |                                                np.max(h_score) * prediction_H[i])
191 |             continue
192 | 
193 |           # Impossible to perform this action
194 |           if H_ins[4] * Score_obj[max_idx[i]][4 + i] == 0:
195 |             dic[Action_dic_inv[i]] = np.append(np.full(4, np.nan).reshape(1, 4),
196 |                                                np.max(h_score) * Score_obj[max_idx[i]][4 + i])
197 | 
198 |           # Action with >0 score
199 |           else:
200 |             dic[Action_dic_inv[i]] = np.append(Score_obj[max_idx[i]][:4],
201 |                                                np.max(h_score) * Score_obj[max_idx[i]][4 + i])
202 | 
203 |         detection.append(dic)
204 | 
205 |     _t['im_detect'].toc()
206 | 
207 |     print('im_detect: {:d}/{:d} {:.3f}s'.format(count + 1, 4946, _t['im_detect'].average_time))
208 |     count += 1
209 | 
210 |   pickle.dump(detection, open(output_file, "wb"))
211 | 
212 | 
213 | if __name__ == '__main__':
214 |   opt = opts().parse()
215 | 
216 |   human_thres = 0.3
217 |   object_thres = 0.1
218 |   action_thres = 0.05
219 | 
220 |   np.random.seed(3)
221 |   detection = []
222 | 
223 |   DATA_DIR = '/home/wangtiancai/data/vcoco'
224 | 
225 |   with open(DATA_DIR + '/' + 'prior_mask.pkl', 'rb') as f:
226 |     prior_mask = pickle.load(f, encoding='latin1')
227 |   with open(DATA_DIR + '/' + 'Test_Faster_RCNN_R-50-PFN_2x_VCOCO.pkl', 'rb') as f:
228 |     Test_RCNN = pickle.load(f, encoding='latin1')
229 | 
230 |   Action_dic = json.load(open(DATA_DIR + '/' + 'action_index.json'))
231 |   Action_dic_inv = {y: x for x, y in Action_dic.items()}
232 | 
233 |   ROOT_DIR = '/home/wangtiancai/data/vcoco/'
234 | 
235 |   output_file = ROOT_DIR + '/Results/' + 'SS' + '_' + 'HOI' + '.pkl'
236 | 
237 |   vcocoeval = VCOCOeval(DATA_DIR + '/' + 'vcoco_test.json',
238 |                         DATA_DIR + '/' + 'instances_vcoco_all_2014.json',
239 |                         DATA_DIR + '/' + 'vcoco_test.ids')
240 | 
241 |   test(opt, Test_RCNN, prior_mask, Action_dic_inv, output_file, human_thres, object_thres, action_thres, detection)
242 | 
243 |   vcocoeval._do_eval(output_file, ovr_thresh=0.5)
244 | 
245 | 


--------------------------------------------------------------------------------
/src/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/src/tools/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, '../lib')
12 | add_path(lib_path)
13 | 


--------------------------------------------------------------------------------
/src/tools/convert_hourglass_weight.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | MODEL_PATH = '../../models/ExtremeNet_500000.pkl'
 6 | OUT_PATH = '../../models/ExtremeNet_500000.pth'
 7 | 
 8 | import torch
 9 | state_dict = torch.load(MODEL_PATH)
10 | key_map = {'t_heats': 'hm_t', 'l_heats': 'hm_l', 'b_heats': 'hm_b', \
11 |            'r_heats': 'hm_r', 'ct_heats': 'hm_c', \
12 |            't_regrs': 'reg_t', 'l_regrs': 'reg_l', \
13 |            'b_regrs': 'reg_b', 'r_regrs': 'reg_r'}
14 | 
15 | out = {}
16 | for k in state_dict.keys():
17 |   changed = False
18 |   for m in key_map.keys():
19 |     if m in k:
20 |       if 'ct_heats' in k and m == 't_heats':
21 |         continue
22 |       new_k = k.replace(m, key_map[m])
23 |       out[new_k] = state_dict[k]
24 |       changed = True
25 |       print('replace {} to {}'.format(k, new_k))
26 |   if not changed:
27 |     out[k] = state_dict[k]
28 | data = {'epoch': 0,
29 |         'state_dict': out}
30 | torch.save(data, OUT_PATH)
31 | 


--------------------------------------------------------------------------------
/src/tools/eval_coco.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import sys
 8 | import cv2
 9 | import numpy as np
10 | import pickle
11 | import os
12 | 
13 | this_dir = os.path.dirname(__file__)
14 | ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json'
15 | print(ANN_PATH)
16 | if __name__ == '__main__':
17 |   pred_path = sys.argv[1]
18 |   coco = coco.COCO(ANN_PATH)
19 |   dets = coco.loadRes(pred_path)
20 |   img_ids = coco.getImgIds()
21 |   num_images = len(img_ids)
22 |   coco_eval = COCOeval(coco, dets, "bbox")
23 |   coco_eval.evaluate()
24 |   coco_eval.accumulate()
25 |   coco_eval.summarize()
26 | 


--------------------------------------------------------------------------------
/src/tools/eval_coco_hp.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import pycocotools.coco as coco
 6 | from pycocotools.cocoeval import COCOeval
 7 | import sys
 8 | import cv2
 9 | import numpy as np
10 | import pickle
11 | import os
12 | 
13 | this_dir = os.path.dirname(__file__)
14 | ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json'
15 | print(ANN_PATH)
16 | if __name__ == '__main__':
17 |   pred_path = sys.argv[1]
18 |   coco = coco.COCO(ANN_PATH)
19 |   dets = coco.loadRes(pred_path)
20 |   img_ids = coco.getImgIds()
21 |   num_images = len(img_ids)
22 |   coco_eval = COCOeval(coco, dets, "keypoints")
23 |   coco_eval.evaluate()
24 |   coco_eval.accumulate()
25 |   coco_eval.summarize()
26 |   coco_eval = COCOeval(coco, dets, "bbox")
27 |   coco_eval.evaluate()
28 |   coco_eval.accumulate()
29 |   coco_eval.summarize()
30 |   
31 | 


--------------------------------------------------------------------------------
/src/tools/reval.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # --------------------------------------------------------
 4 | # Fast R-CNN
 5 | # Copyright (c) 2015 Microsoft
 6 | # Licensed under The MIT License [see LICENSE for details]
 7 | # Written by Ross Girshick
 8 | # Modified by Xingyi Zhou
 9 | # --------------------------------------------------------
10 | 
11 | # Reval = re-eval. Re-evaluate saved detections.
12 | from __future__ import absolute_import
13 | from __future__ import division
14 | from __future__ import print_function
15 | 
16 | import sys
17 | import os.path as osp
18 | sys.path.insert(0, osp.join(osp.dirname(__file__), 'voc_eval_lib'))
19 | 
20 | from model.test import apply_nms
21 | from datasets.pascal_voc import pascal_voc
22 | import pickle
23 | import os, argparse
24 | import numpy as np
25 | import json
26 | 
27 | def parse_args():
28 |   """
29 |   Parse input arguments
30 |   """
31 |   parser = argparse.ArgumentParser(description='Re-evaluate results')
32 |   parser.add_argument('detection_file', type=str)
33 |   parser.add_argument('--output_dir', help='results directory', type=str)
34 |   parser.add_argument('--imdb', dest='imdb_name',
35 |                       help='dataset to re-evaluate',
36 |                       default='voc_2007_test', type=str)
37 |   parser.add_argument('--matlab', dest='matlab_eval',
38 |                       help='use matlab for evaluation',
39 |                       action='store_true')
40 |   parser.add_argument('--comp', dest='comp_mode', help='competition mode',
41 |                       action='store_true')
42 |   parser.add_argument('--nms', dest='apply_nms', help='apply nms',
43 |                       action='store_true')
44 | 
45 |   if len(sys.argv) == 1:
46 |     parser.print_help()
47 |     sys.exit(1)
48 | 
49 |   args = parser.parse_args()
50 |   return args
51 | 
52 | 
53 | def from_dets(imdb_name, detection_file, args):
54 |   imdb = pascal_voc('test', '2007')
55 |   imdb.competition_mode(args.comp_mode)
56 |   imdb.config['matlab_eval'] = args.matlab_eval
57 |   with open(os.path.join(detection_file), 'rb') as f:
58 |     if 'json' in detection_file:
59 |       dets = json.load(f)
60 |     else:
61 |       dets = pickle.load(f, encoding='latin1')
62 |   # import pdb; pdb.set_trace()
63 |   if args.apply_nms:
64 |     print('Applying NMS to all detections')
65 |     test_nms = 0.3
66 |     nms_dets = apply_nms(dets, test_nms)
67 |   else:
68 |     nms_dets = dets
69 | 
70 |   print('Evaluating detections')
71 |   imdb.evaluate_detections(nms_dets)
72 | 
73 | 
74 | if __name__ == '__main__':
75 |   args = parse_args()
76 | 
77 |   imdb_name = args.imdb_name
78 |   from_dets(imdb_name, args.detection_file, args)
79 | 


--------------------------------------------------------------------------------
/src/tools/vis_pred.py:
--------------------------------------------------------------------------------
  1 | import pycocotools.coco as coco
  2 | from pycocotools.cocoeval import COCOeval
  3 | import sys
  4 | import cv2
  5 | import numpy as np
  6 | import pickle
  7 | IMG_PATH = '../../data/coco/val2017/'
  8 | ANN_PATH = '../../data/coco/annotations/instances_val2017.json'
  9 | DEBUG = True
 10 | 
 11 | def _coco_box_to_bbox(box):
 12 |   bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]],
 13 |                   dtype=np.int32)
 14 |   return bbox
 15 | 
 16 | _cat_ids = [
 17 |   1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 
 18 |   14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
 19 |   24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 
 20 |   37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 
 21 |   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 
 22 |   58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 
 23 |   72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 
 24 |   82, 84, 85, 86, 87, 88, 89, 90
 25 | ]
 26 | num_classes = 80
 27 | _classes = {
 28 |   ind + 1: cat_id for ind, cat_id in enumerate(_cat_ids)
 29 | }
 30 | _to_order = {cat_id: ind for ind, cat_id in enumerate(_cat_ids)}
 31 | coco = coco.COCO(ANN_PATH)
 32 | CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \
 33 |               for i in range(num_classes)]
 34 | COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \
 35 |               for _ in range(num_classes)]
 36 | 
 37 | 
 38 | def add_box(image, bbox, sc, cat_id):
 39 |   cat_id = _to_order[cat_id]
 40 |   cat_name = CAT_NAMES[cat_id]
 41 |   cat_size  = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
 42 |   color = np.array(COLORS[cat_id]).astype(np.int32).tolist()
 43 |   txt = '{}{:.0f}'.format(cat_name, sc * 10)
 44 |   if bbox[1] - cat_size[1] - 2 < 0:
 45 |     cv2.rectangle(image,
 46 |                   (bbox[0], bbox[1] + 2),
 47 |                   (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2),
 48 |                   color, -1)
 49 |     cv2.putText(image, txt, 
 50 |                 (bbox[0], bbox[1] + cat_size[1] + 2), 
 51 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
 52 |   else:
 53 |     cv2.rectangle(image,
 54 |                   (bbox[0], bbox[1] - cat_size[1] - 2),
 55 |                   (bbox[0] + cat_size[0], bbox[1] - 2),
 56 |                   color, -1)
 57 |     cv2.putText(image, txt, 
 58 |                 (bbox[0], bbox[1] - 2), 
 59 |                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1)
 60 |   cv2.rectangle(image,
 61 |                 (bbox[0], bbox[1]),
 62 |                 (bbox[2], bbox[3]),
 63 |                 color, 2)
 64 |   return image
 65 | 
 66 | if __name__ == '__main__':
 67 |   dets = []
 68 |   img_ids = coco.getImgIds()
 69 |   num_images = len(img_ids)
 70 |   for k in range(1, len(sys.argv)):
 71 |     pred_path = sys.argv[k]
 72 |     dets.append(coco.loadRes(pred_path))
 73 |   # import pdb; pdb.set_trace()
 74 |   for i, img_id in enumerate(img_ids):
 75 |     img_info = coco.loadImgs(ids=[img_id])[0]
 76 |     img_path = IMG_PATH + img_info['file_name']
 77 |     img = cv2.imread(img_path)
 78 |     gt_ids = coco.getAnnIds(imgIds=[img_id])
 79 |     gts = coco.loadAnns(gt_ids)
 80 |     gt_img = img.copy()
 81 |     for j, pred in enumerate(gts):
 82 |       bbox = _coco_box_to_bbox(pred['bbox'])
 83 |       cat_id = pred['category_id']
 84 |       gt_img = add_box(gt_img, bbox, 0, cat_id)
 85 |     for k in range(len(dets)):
 86 |       pred_ids = dets[k].getAnnIds(imgIds=[img_id])
 87 |       preds = dets[k].loadAnns(pred_ids)
 88 |       pred_img = img.copy()
 89 |       for j, pred in enumerate(preds):
 90 |         bbox = _coco_box_to_bbox(pred['bbox'])
 91 |         sc = pred['score']
 92 |         cat_id = pred['category_id']
 93 |         if sc > 0.2:
 94 |           pred_img = add_box(pred_img, bbox, sc, cat_id)
 95 |       cv2.imshow('pred{}'.format(k), pred_img)
 96 |       # cv2.imwrite('vis/{}_pred{}.png'.format(i, k), pred_img)
 97 |     cv2.imshow('gt', gt_img)
 98 |     # cv2.imwrite('vis/{}_gt.png'.format(i), gt_img)
 99 |     cv2.waitKey()
100 |   # coco_eval.evaluate()
101 |   # coco_eval.accumulate()
102 |   # coco_eval.summarize()
103 | 
104 |   
105 | 


--------------------------------------------------------------------------------