├── LICENSE ├── README.md ├── data └── .gitignore ├── exp └── .gitignore ├── experiments └── ctdet_coco_hg.sh ├── models └── .gitignore └── src ├── _init_paths.py ├── apply_prior.py ├── lib ├── datasets │ ├── dataset │ │ ├── coco.py │ │ └── coco_hp.py │ ├── dataset_factory.py │ └── sample │ │ └── ctdet.py ├── detectors │ ├── base_detector.py │ ├── ctdet.py │ └── detector_factory.py ├── external │ ├── .gitignore │ ├── Makefile │ ├── __init__.py │ ├── nms.pyx │ └── setup.py ├── logger.py ├── models │ ├── data_parallel.py │ ├── decode.py │ ├── losses.py │ ├── model.py │ ├── networks │ │ ├── DCNv2 │ │ │ ├── .gitignore │ │ │ ├── LICENSE │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ ├── build_double.py │ │ │ ├── dcn_v2.py │ │ │ ├── dcn_v2_func.py │ │ │ ├── make.sh │ │ │ ├── src │ │ │ │ ├── cuda │ │ │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ │ │ ├── dcn_v2_im2col_cuda_double.cu │ │ │ │ │ ├── dcn_v2_im2col_cuda_double.h │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.h │ │ │ │ │ ├── dcn_v2_psroi_pooling_cuda_double.cu │ │ │ │ │ └── dcn_v2_psroi_pooling_cuda_double.h │ │ │ │ ├── dcn_v2.c │ │ │ │ ├── dcn_v2.h │ │ │ │ ├── dcn_v2_cuda.c │ │ │ │ ├── dcn_v2_cuda.h │ │ │ │ ├── dcn_v2_cuda_double.c │ │ │ │ ├── dcn_v2_cuda_double.h │ │ │ │ ├── dcn_v2_double.c │ │ │ │ └── dcn_v2_double.h │ │ │ └── test.py │ │ ├── dlav0.py │ │ ├── large_hourglass.py │ │ ├── msra_resnet.py │ │ ├── pose_dla_dcn.py │ │ ├── py_utils │ │ │ ├── __init__.py │ │ │ ├── _cpools │ │ │ │ ├── .gitignore │ │ │ │ ├── __init__.py │ │ │ │ ├── setup.py │ │ │ │ └── src │ │ │ │ │ ├── bottom_pool.cpp │ │ │ │ │ ├── left_pool.cpp │ │ │ │ │ ├── right_pool.cpp │ │ │ │ │ └── top_pool.cpp │ │ │ ├── data_parallel.py │ │ │ ├── losses.py │ │ │ ├── modules.py │ │ │ ├── scatter_gather.py │ │ │ └── utils.py │ │ └── resnet_dcn.py │ ├── scatter_gather.py │ └── utils.py ├── opts.py ├── trains │ ├── base_trainer.py │ ├── ctdet.py │ └── train_factory.py └── utils │ ├── __init__.py │ ├── ddd_utils.py │ ├── debugger.py │ ├── image.py │ ├── oracle_utils.py │ ├── post_process.py │ └── utils.py ├── test_HOI.py ├── timer.py ├── tools ├── _init_paths.py ├── calc_coco_overlap.py ├── convert_hourglass_weight.py ├── eval_coco.py ├── eval_coco_hp.py ├── reval.py └── vis_pred.py └── vsrl_eval.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Tiancai Wang 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning Human-Object Interaction Detection using Interaction Points 2 | 3 | Created by Tiancai Wang, Tong Yang, Martin Danelljan, Fahad Shahbaz Khan, Xiangyu Zhang, Jian Sun 4 | 5 | Link for our paper: [arxiv](https://arxiv.org/abs/2003.14023) and [CVPR2020](http://openaccess.thecvf.com/content_CVPR_2020/html/Wang_Learning_Human-Object_Interaction_Detection_Using_Interaction_Points_CVPR_2020_paper.html) 6 | 7 | ### Introduction 8 | Understanding interactions between humans and objects is one of the fundamental problems in visual classification and an essential step towards detailed scene understand-ing. Human-object interaction(HOI) detection strives to localize both the human and an object as well as the identification of complex interactions between them. Most existing HOI detection approaches are instance-centric where interactions between all possible human-object pairs are predicted based on appearance features and coarse spatial information. We argue that appearance features aloneare insufficient to capture complex human-object interactions. In this paper, we therefore propose a novel fully-convolutional approach that directly detects the interactions between human-object pairs. Our network predicts interaction points, which directly localize and classify the interaction. Paired with the densely predicted interaction vectors, the interactions are associated with human and object detections to obtain final predictions. To the best of ourknowledge, we are the first to propose an approach whereHOI detection is posed as a keypoint detection and group-ing problem. Experiments are performed on two popularbenchmarks: V-COCO and HICO-DET. Our approach sets a new state-of-the-art on both datasets. 9 | 10 | ## Installation 11 | - Clone this repository. This repository is mainly based on [CenterNet](https://github.com/xingyizhou/CenterNet) and [iCAN](https://github.com/vt-vl-lab/iCAN). 12 | 13 | ```Shell 14 | IPNet_ROOT=/path/to/clone/IPNet 15 | git clone https://github.com/vaesl/IP-Net $IPNet_ROOT 16 | ``` 17 | - The code was tested on Ubuntu 18.04, with [Anaconda](https://www.anaconda.com/download) Python 3.6 and [PyTorch]((http://pytorch.org/)) v1.0.1. 18 | NVIDIA GPUs are needed for testing. After install Anaconda, create a new conda environment, activate the environment and install pytorch1.0.1. 19 | 20 | ```Shell 21 | conda create -n IPNet python=3.6 22 | source activate IPNet 23 | conda install pytorch=1.0.1 torchvision -c pytorch 24 | ``` 25 | 26 | - Install the requirements. 27 | ```Shell 28 | pip3 install -r requirements.txt 29 | ``` 30 | - Compiling Center Pooling Layers. 31 | ```Shell 32 | cd IPNet_ROOT/src/lib/models/networks/py_utils/_cpools/ 33 | python setup.py install --user 34 | ``` 35 | 36 | - Install [COCOAPI](https://github.com/cocodataset/cocoapi): 37 | 38 | ~~~ 39 | # COCOAPI=/path/to/clone/cocoapi 40 | git clone https://github.com/cocodataset/cocoapi.git $COCOAPI 41 | cd $COCOAPI/PythonAPI 42 | make 43 | python setup.py install --user 44 | ~~~ 45 | 46 | ## Download 47 | To evaluate the performance reported in the paper, V-COCO and HICO-DET dataset as well as our trained models need to be downloaded. 48 | 49 | ### V-COCO and HICO-DET Datasets 50 | Download datasets and setup evaluation and API, please follow [iCAN](https://github.com/vt-vl-lab/iCAN). 51 | 52 | ### Trained Models 53 | Please access [Google Driver](https://drive.google.com/file/d/1stBqpTncUFfl-naKn4NONRmC-89jtdyh/view?usp=sharing) 54 | to obtain our trained models for V-COCO and put the models into corresponding directory(e.g. '~/weights/V-COCO/'). 55 | Note that we only release models of V-COCO for the time being. 56 | 57 | ## Evaluation 58 | To check the performance reported in the paper, just simply run: 59 | 60 | ```Shell 61 | python3 test_HOI.py ctdet --exp_id coco_hg --fix_res --arch hourglass --flip_test --load_model /path/to/model/weights 62 | ``` 63 | 64 | ## Citation 65 | Please cite our paper in your publications if it helps your research: 66 | 67 | @article{Wang2020IPNet, 68 | title = {Learning Human-Object Interaction Detection using Interaction Points}, 69 | author = {Tiancai Wang, Tong Yang, Martin Danelljan, Fahad Shahbaz Khan, Xiangyu Zhang, Jian Sun}, 70 | booktitle = {CVPR}, 71 | year = {2020} 72 | } 73 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /exp/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /experiments/ctdet_coco_hg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd src 3 | # train 4 | python main.py ctdet --exp_id coco_hg --arch hourglass --batch_size 24 --master_batch 4 --lr 2.5e-4 --load_model ../models/ExtremeNet_500000.pth --gpus 0,1,2,3,4 5 | # test 6 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume 7 | # flip test 8 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test 9 | # multi scale test 10 | python test.py ctdet --exp_id coco_hg --arch hourglass --keep_res --resume --flip_test --test_scales 0.5,0.75,1,1.25,1.5 11 | cd .. -------------------------------------------------------------------------------- /models/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore 3 | -------------------------------------------------------------------------------- /src/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/apply_prior.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow iCAN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Chen Gao 5 | # -------------------------------------------------------- 6 | 7 | def apply_prior(Object, prediction): 8 | 9 | 10 | if Object[4] != 32: # not a snowboard, then the action is impossible to be snowboard 11 | prediction[21] = 0 12 | 13 | if Object[4] != 74: # not a book, then the action is impossible to be read 14 | prediction[24] = 0 15 | 16 | if Object[4] != 33: # not a sports ball, then the action is impossible to be kick 17 | prediction[7] = 0 18 | 19 | if (Object[4] != 41) and (Object[4] != 40) and (Object[4] != 42) and (Object[4] != 46): # not 'wine glass', 'bottle', 'cup', 'bowl', then the action is impossible to be drink 20 | prediction[13] = 0 21 | 22 | if Object[4] != 37: # not a skateboard, then the action is impossible to be skateboard 23 | prediction[26] = 0 24 | 25 | if Object[4] != 38: # not a surfboard, then the action is impossible to be surfboard 26 | prediction[0] = 0 27 | 28 | if Object[4] != 31: # not a ski, then the action is impossible to be ski 29 | prediction[1] = 0 30 | 31 | if Object[4] != 64: # not a laptop, then the action is impossible to be work on computer 32 | prediction[8] = 0 33 | 34 | if (Object[4] != 77) and (Object[4] != 43) and (Object[4] != 44): # not 'scissors', 'fork', 'knife', then the action is impossible to be cur instr 35 | prediction[2] = 0 36 | 37 | if (Object[4] != 33) and (Object[4] != 30): # not 'sports ball', 'frisbee', then the action is impossible to be throw and catch 38 | prediction[15] = 0 39 | prediction[28] = 0 40 | 41 | if Object[4] != 68: # not a cellphone, then the action is impossible to be talk_on_phone 42 | prediction[6] = 0 43 | 44 | if (Object[4] != 14) and (Object[4] != 61) and (Object[4] != 62) and (Object[4] != 60) and (Object[4] != 58) and (Object[4] != 57): # not 'bench', 'dining table', 'toilet', 'bed', 'couch', 'chair', then the action is impossible to be lay 45 | prediction[12] = 0 46 | 47 | if (Object[4] != 32) and (Object[4] != 31) and (Object[4] != 37) and (Object[4] != 38): # not 'snowboard', 'skis', 'skateboard', 'surfboard', then the action is impossible to be jump 48 | prediction[11] = 0 49 | 50 | if (Object[4] != 47) and (Object[4] != 48) and (Object[4] != 49) and (Object[4] != 50) and (Object[4] != 51) and (Object[4] != 52) and (Object[4] != 53) and (Object[4] != 54) and (Object[4] != 55) and (Object[4] != 56): # not ''banana', 'apple', 'sandwich', 'orange', 'carrot', 'broccoli', 'hot dog', 'pizza', 'cake', 'donut', then the action is impossible to be eat_obj 51 | prediction[9] = 0 52 | 53 | if (Object[4] != 43) and (Object[4] != 44) and (Object[4] != 45): # not 'fork', 'knife', 'spoon', then the action is impossible to be eat_instr 54 | prediction[16] = 0 55 | 56 | if (Object[4] != 39) and (Object[4] != 35): # not 'tennis racket', 'baseball bat', then the action is impossible to be hit_instr 57 | prediction[19] = 0 58 | 59 | if (Object[4] != 33): # not 'sports ball, then the action is impossible to be hit_obj 60 | prediction[20] = 0 61 | 62 | 63 | if (Object[4] != 2) and (Object[4] != 4) and (Object[4] != 6) and (Object[4] != 8) and (Object[4] != 9) and (Object[4] != 7) and (Object[4] != 5) and (Object[4] != 3) and (Object[4] != 18) and (Object[4] != 21): # not 'bicycle', 'motorcycle', 'bus', 'truck', 'boat', 'train', 'airplane', 'car', 'horse', 'elephant', then the action is impossible to be ride 64 | prediction[5] = 0 65 | 66 | if (Object[4] != 2) and (Object[4] != 4) and (Object[4] != 18) and (Object[4] != 21) and (Object[4] != 14) and (Object[4] != 57) and (Object[4] != 58) and (Object[4] != 60) and (Object[4] != 62) and (Object[4] != 61) and (Object[4] != 29) and (Object[4] != 27) and (Object[4] != 25): # not 'bicycle', 'motorcycle', 'horse', 'elephant', 'bench', 'chair', 'couch', 'bed', 'toilet', 'dining table', 'suitcase', 'handbag', 'backpack', then the action is impossible to be sit 67 | prediction[10] = 0 68 | 69 | if (Object[4] == 1): 70 | prediction[4] = 0 71 | 72 | return prediction 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset/coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | 14 | class COCO(data.Dataset): 15 | num_obj_classes = 80 16 | num_act_classes = 29 17 | default_resolution = [512, 512] 18 | mean = np.array([0.40789654, 0.44719302, 0.47026115], dtype=np.float32).reshape(1, 1, 3) 19 | std = np.array([0.28863828, 0.27408164, 0.27809835], dtype=np.float32).reshape(1, 1, 3) 20 | 21 | def __init__(self, opt, split): 22 | super(COCO, self).__init__() 23 | self.data_dir = os.path.join(opt.data_dir, 'vcoco') 24 | if split == 'test': 25 | self.annot_path = os.path.join( 26 | self.data_dir, 'annotations', 'instances_vcoco_test2014.json') 27 | else: 28 | self.annot_path = os.path.join( 29 | self.data_dir, 'annotations', 'instances_hoi_action_point_iCAN.json') 30 | self.max_objs = 128 31 | self.class_name = [ 32 | '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 33 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 34 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 35 | 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 36 | 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 37 | 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 38 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 39 | 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 40 | 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 41 | 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 42 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 43 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 44 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] 45 | self._valid_ids = [ 46 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 47 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 48 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 49 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 50 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 51 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 52 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 53 | 82, 84, 85, 86, 87, 88, 89, 90] 54 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} 55 | self.voc_color = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) \ 56 | for v in range(1, self.num_obj_classes + 1)] 57 | self._data_rng = np.random.RandomState(123) 58 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 59 | dtype=np.float32) 60 | self._eig_vec = np.array([ 61 | [-0.58752847, -0.69563484, 0.41340352], 62 | [-0.5832747, 0.00994535, -0.81221408], 63 | [-0.56089297, 0.71832671, 0.41158938] 64 | ], dtype=np.float32) 65 | # self.mean = np.array([0.485, 0.456, 0.406], np.float32).reshape(1, 1, 3) 66 | # self.std = np.array([0.229, 0.224, 0.225], np.float32).reshape(1, 1, 3) 67 | 68 | self.split = split 69 | self.opt = opt 70 | 71 | print('==> initializing coco 2014 {} data.'.format(split)) 72 | self.coco = coco.COCO(self.annot_path) 73 | self.images = self.coco.getImgIds() 74 | self.num_samples = len(self.images) 75 | 76 | print('Loaded {} {} samples'.format(split, self.num_samples)) 77 | 78 | def _to_float(self, x): 79 | return float("{:.2f}".format(x)) 80 | 81 | def convert_eval_format(self, all_bboxes): 82 | # import pdb; pdb.set_trace() 83 | detections = [] 84 | for image_id in all_bboxes: 85 | for cls_ind in all_bboxes[image_id]: 86 | category_id = self._valid_ids[cls_ind - 1] 87 | for bbox in all_bboxes[image_id][cls_ind]: 88 | bbox[2] -= bbox[0] 89 | bbox[3] -= bbox[1] 90 | score = bbox[4] 91 | bbox_out = list(map(self._to_float, bbox[0:4])) 92 | 93 | detection = { 94 | "image_id": int(image_id), 95 | "category_id": int(category_id), 96 | "bbox": bbox_out, 97 | "score": float("{:.2f}".format(score)) 98 | } 99 | if len(bbox) > 5: 100 | extreme_points = list(map(self._to_float, bbox[5:13])) 101 | detection["extreme_points"] = extreme_points 102 | detections.append(detection) 103 | return detections 104 | 105 | def __len__(self): 106 | return self.num_samples 107 | 108 | def save_results(self, results, save_dir): 109 | json.dump(self.convert_eval_format(results), 110 | open('{}/results.json'.format(save_dir), 'w')) 111 | 112 | def run_eval(self, results, save_dir): 113 | # result_json = os.path.join(save_dir, "results.json") 114 | # detections = self.convert_eval_format(results) 115 | # json.dump(detections, open(result_json, "w")) 116 | self.save_results(results, save_dir) 117 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir)) 118 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 119 | coco_eval.evaluate() 120 | coco_eval.accumulate() 121 | coco_eval.summarize() 122 | -------------------------------------------------------------------------------- /src/lib/datasets/dataset/coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | class COCOHP(data.Dataset): 14 | num_classes = 1 15 | num_joints = 17 16 | default_resolution = [512, 512] 17 | mean = np.array([0.40789654, 0.44719302, 0.47026115], 18 | dtype=np.float32).reshape(1, 1, 3) 19 | std = np.array([0.28863828, 0.27408164, 0.27809835], 20 | dtype=np.float32).reshape(1, 1, 3) 21 | flip_idx = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], 22 | [11, 12], [13, 14], [15, 16]] 23 | def __init__(self, opt, split): 24 | super(COCOHP, self).__init__() 25 | self.edges = [[0, 1], [0, 2], [1, 3], [2, 4], 26 | [4, 6], [3, 5], [5, 6], 27 | [5, 7], [7, 9], [6, 8], [8, 10], 28 | [6, 12], [5, 11], [11, 12], 29 | [12, 14], [14, 16], [11, 13], [13, 15]] 30 | 31 | self.acc_idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] 32 | self.data_dir = os.path.join(opt.data_dir, 'coco') 33 | self.img_dir = os.path.join(self.data_dir, '{}2017'.format(split)) 34 | if split == 'test': 35 | self.annot_path = os.path.join( 36 | self.data_dir, 'annotations', 37 | 'image_info_test-dev2017.json').format(split) 38 | else: 39 | self.annot_path = os.path.join( 40 | self.data_dir, 'annotations', 41 | 'person_keypoints_{}2017.json').format(split) 42 | self.max_objs = 32 43 | self._data_rng = np.random.RandomState(123) 44 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 45 | dtype=np.float32) 46 | self._eig_vec = np.array([ 47 | [-0.58752847, -0.69563484, 0.41340352], 48 | [-0.5832747, 0.00994535, -0.81221408], 49 | [-0.56089297, 0.71832671, 0.41158938] 50 | ], dtype=np.float32) 51 | self.split = split 52 | self.opt = opt 53 | 54 | print('==> initializing coco 2017 {} data.'.format(split)) 55 | self.coco = coco.COCO(self.annot_path) 56 | image_ids = self.coco.getImgIds() 57 | 58 | if split == 'train': 59 | self.images = [] 60 | for img_id in image_ids: 61 | idxs = self.coco.getAnnIds(imgIds=[img_id]) 62 | if len(idxs) > 0: 63 | self.images.append(img_id) 64 | else: 65 | self.images = image_ids 66 | self.num_samples = len(self.images) 67 | print('Loaded {} {} samples'.format(split, self.num_samples)) 68 | 69 | def _to_float(self, x): 70 | return float("{:.2f}".format(x)) 71 | 72 | def convert_eval_format(self, all_bboxes): 73 | # import pdb; pdb.set_trace() 74 | detections = [] 75 | for image_id in all_bboxes: 76 | for cls_ind in all_bboxes[image_id]: 77 | category_id = 1 78 | for dets in all_bboxes[image_id][cls_ind]: 79 | bbox = dets[:4] 80 | bbox[2] -= bbox[0] 81 | bbox[3] -= bbox[1] 82 | score = dets[4] 83 | bbox_out = list(map(self._to_float, bbox)) 84 | keypoints = np.concatenate([ 85 | np.array(dets[5:39], dtype=np.float32).reshape(-1, 2), 86 | np.ones((17, 1), dtype=np.float32)], axis=1).reshape(51).tolist() 87 | keypoints = list(map(self._to_float, keypoints)) 88 | 89 | detection = { 90 | "image_id": int(image_id), 91 | "category_id": int(category_id), 92 | "bbox": bbox_out, 93 | "score": float("{:.2f}".format(score)), 94 | "keypoints": keypoints 95 | } 96 | detections.append(detection) 97 | return detections 98 | 99 | def __len__(self): 100 | return self.num_samples 101 | 102 | def save_results(self, results, save_dir): 103 | json.dump(self.convert_eval_format(results), 104 | open('{}/results.json'.format(save_dir), 'w')) 105 | 106 | 107 | def run_eval(self, results, save_dir): 108 | # result_json = os.path.join(opt.save_dir, "results.json") 109 | # detections = convert_eval_format(all_boxes) 110 | # json.dump(detections, open(result_json, "w")) 111 | self.save_results(results, save_dir) 112 | coco_dets = self.coco.loadRes('{}/results.json'.format(save_dir)) 113 | coco_eval = COCOeval(self.coco, coco_dets, "keypoints") 114 | coco_eval.evaluate() 115 | coco_eval.accumulate() 116 | coco_eval.summarize() 117 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 118 | coco_eval.evaluate() 119 | coco_eval.accumulate() 120 | coco_eval.summarize() -------------------------------------------------------------------------------- /src/lib/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .sample.ctdet import CTDetDataset 6 | 7 | from .dataset.coco import COCO 8 | from .dataset.coco_hp import COCOHP 9 | 10 | 11 | dataset_factory = { 12 | 'coco': COCO, 13 | 'coco_hp': COCOHP 14 | } 15 | 16 | _sample_factory = { 17 | 'ctdet': CTDetDataset, 18 | } 19 | 20 | 21 | def get_dataset(dataset, task): 22 | class Dataset(dataset_factory[dataset], _sample_factory[task]): 23 | pass 24 | return Dataset 25 | -------------------------------------------------------------------------------- /src/lib/datasets/sample/ctdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch.utils.data as data 6 | import numpy as np 7 | import torch 8 | import json 9 | import cv2 10 | import os 11 | from utils.image import flip, color_aug 12 | from utils.image import get_affine_transform, affine_transform 13 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian 14 | from utils.image import draw_dense_reg 15 | import math 16 | import boto3 17 | import io 18 | import Image 19 | 20 | host = "http://oss.wuhu-a.brainpp.cn" 21 | s3_client = boto3.client('s3', endpoint_url=host) 22 | 23 | 24 | class CTDetDataset(data.Dataset): 25 | def _coco_box_to_bbox(self, box): 26 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 27 | dtype=np.float32) 28 | return bbox 29 | 30 | def _get_border(self, border, size): 31 | i = 1 32 | while size - border // i <= border // i: 33 | i *= 2 34 | return border // i 35 | 36 | def __getitem__(self, index): 37 | img_id = self.images[index] 38 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] 39 | img_dir = 'coco/images/trainval2017/' 40 | img_path = os.path.join(img_dir, file_name) 41 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 42 | anns = self.coco.loadAnns(ids=ann_ids) 43 | num_objs = min(len(anns), self.max_objs) 44 | 45 | img_obj = s3_client.get_object(Bucket="wangtiancai", Key=img_path) 46 | img = np.array(Image.open(io.BytesIO(img_obj['Body'].read())).convert('RGB'), dtype=np.float32) 47 | 48 | height, width = img.shape[0], img.shape[1] 49 | c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) 50 | if self.opt.keep_res: 51 | input_h = (height | self.opt.pad) + 1 52 | input_w = (width | self.opt.pad) + 1 53 | s = np.array([input_w, input_h], dtype=np.float32) 54 | else: 55 | s = max(img.shape[0], img.shape[1]) * 1.0 56 | input_h, input_w = self.opt.input_h, self.opt.input_w 57 | 58 | flipped = False 59 | if self.split == 'train': 60 | if not self.opt.not_rand_crop: 61 | s = s * np.random.choice(np.arange(0.6, 1.4, 0.1)) 62 | w_border = self._get_border(128, img.shape[1]) 63 | h_border = self._get_border(128, img.shape[0]) 64 | c[0] = np.random.randint(low=w_border, high=img.shape[1] - w_border) 65 | c[1] = np.random.randint(low=h_border, high=img.shape[0] - h_border) 66 | else: 67 | sf = self.opt.scale 68 | cf = self.opt.shift 69 | c[0] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) 70 | c[1] += s * np.clip(np.random.randn()*cf, -2*cf, 2*cf) 71 | s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf) 72 | 73 | if np.random.random() < self.opt.flip: 74 | flipped = True 75 | # print(img.shape) 76 | img = img[:, ::-1, :] 77 | c[0] = width - c[0] - 1 78 | 79 | trans_input = get_affine_transform( 80 | c, s, 0, [input_w, input_h]) 81 | inp = cv2.warpAffine(img, trans_input, 82 | (input_w, input_h), 83 | flags=cv2.INTER_LINEAR) 84 | inp = (inp.astype(np.float32) / 255.) 85 | if self.split == 'train' and not self.opt.no_color_aug: 86 | color_aug(self._data_rng, inp, self._eig_val, self._eig_vec) 87 | inp = (inp - self.mean) / self.std 88 | inp = inp.transpose(2, 0, 1) 89 | 90 | output_h = input_h // self.opt.down_ratio 91 | output_w = input_w // self.opt.down_ratio 92 | num_obj_classes = self.num_obj_classes 93 | num_act_classes = self.num_act_classes 94 | trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) 95 | 96 | hm = np.zeros((num_obj_classes, output_h, output_w), dtype=np.float32) 97 | hm_act = np.zeros((num_act_classes, output_h, output_w), dtype=np.float32) 98 | wh = np.zeros((self.max_objs, 2), dtype=np.float32) 99 | wh_act = np.zeros((self.max_objs, 2), dtype=np.float32) 100 | dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) 101 | reg = np.zeros((self.max_objs, 2), dtype=np.float32) 102 | ind = np.zeros((self.max_objs), dtype=np.int64) 103 | ind_act = np.zeros((self.max_objs), dtype=np.int64) 104 | reg_mask = np.zeros((self.max_objs), dtype=np.uint8) 105 | reg_act_mask = np.zeros((self.max_objs), dtype=np.uint8) 106 | cat_spec_wh = np.zeros((self.max_objs, num_obj_classes * 2), dtype=np.float32) 107 | cat_spec_mask = np.zeros((self.max_objs, num_obj_classes * 2), dtype=np.uint8) 108 | 109 | draw_gaussian = draw_msra_gaussian if self.opt.mse_loss else \ 110 | draw_umich_gaussian 111 | 112 | gt_det = [] 113 | 114 | p = 0 115 | 116 | for k in range(num_objs): 117 | ann = anns[k] 118 | bbox = self._coco_box_to_bbox(ann['bbox']) 119 | cls_id = int(self.cat_ids[ann['category_id']]) 120 | 121 | if flipped: 122 | bbox[[0, 2]] = width - bbox[[2, 0]] - 1 123 | bbox[:2] = affine_transform(bbox[:2], trans_output) 124 | bbox[2:] = affine_transform(bbox[2:], trans_output) 125 | bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1) 126 | bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1) 127 | 128 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] 129 | if h > 0 and w > 0: 130 | radius = gaussian_radius((math.ceil(h), math.ceil(w))) 131 | radius = max(0, int(radius)) 132 | radius = self.opt.hm_gauss if self.opt.mse_loss else radius 133 | ct = np.array( 134 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 135 | ct_int = ct.astype(np.int32) 136 | draw_gaussian(hm[cls_id], ct_int, radius) 137 | wh[k] = 1. * w, 1. * h 138 | ind[k] = ct_int[1] * output_w + ct_int[0] 139 | reg[k] = ct - ct_int 140 | reg_mask[k] = 1 141 | cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] 142 | cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 143 | if self.opt.dense_wh: 144 | draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) 145 | gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 146 | ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) 147 | 148 | if ann['category_id'] == 1: 149 | if len(ann['bbox']) != 4: 150 | for cls_id in ann['bbox'][4:]: 151 | draw_gaussian(hm_act[cls_id], ct_int, radius) 152 | 153 | # h_act = h, w_act = w 154 | # wh_act[p] = 1. * w, 1. * h 155 | # ind_act[p] = ct_int[1] * output_w + ct_int[0] 156 | # reg_act_mask[p] = 1 157 | # p += 1 158 | 159 | if ann['obj_bbox'] != []: 160 | for i, obbox in enumerate(ann['obj_bbox']): 161 | o_bbox = self._coco_box_to_bbox(obbox[:4]) 162 | o_act = obbox[4:] 163 | o_bbox = np.array(o_bbox) 164 | if flipped: 165 | o_bbox[[0, 2]] = width - o_bbox[[2, 0]] - 1 166 | o_bbox[:2] = affine_transform(o_bbox[:2], trans_output) 167 | o_bbox[2:] = affine_transform(o_bbox[2:], trans_output) 168 | o_bbox[[0, 2]] = np.clip(o_bbox[[0, 2]], 0, output_w - 1) 169 | o_bbox[[1, 3]] = np.clip(o_bbox[[1, 3]], 0, output_h - 1) 170 | 171 | o_h, o_w = o_bbox[3] - o_bbox[1], o_bbox[2] - o_bbox[0] 172 | 173 | if o_h > 0 and o_w > 0: 174 | # radius = gaussian_radius((math.ceil(o_h), math.ceil(o_w))) 175 | # radius = max(0, int(radius)) 176 | radius = 10 177 | radius = self.opt.hm_gauss if self.opt.mse_loss else radius 178 | 179 | o_ct = np.array( 180 | [(o_bbox[0] + o_bbox[2]) / 2, (o_bbox[1] + o_bbox[3]) / 2], dtype=np.float32) 181 | act_ct = (ct + o_ct) / 2 182 | act_ct_int = act_ct.astype(np.int32) 183 | 184 | h_act, w_act = abs(ct[1] - o_ct[1]), abs(ct[0] - o_ct[0]) 185 | wh_act[p] = 1. * w_act, 1. * h_act 186 | ind_act[p] = act_ct_int[1] * output_w + act_ct_int[0] 187 | reg_act_mask[p] = 1 188 | p += 1 189 | 190 | for cls_id in o_act: 191 | draw_gaussian(hm_act[cls_id], act_ct_int, radius) 192 | 193 | ret = {'input': inp, 'hm_act': hm_act, 'wh_act':wh_act, 'ind_act':ind_act, 'reg_act_mask':reg_act_mask} 194 | 195 | if self.opt.dense_wh: 196 | hm_a = hm.max(axis=0, keepdims=True) 197 | dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) 198 | ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) 199 | del ret['wh'] 200 | elif self.opt.cat_spec_wh: 201 | ret.update({'cat_spec_wh': cat_spec_wh, 'cat_spec_mask': cat_spec_mask}) 202 | del ret['wh'] 203 | if self.opt.reg_offset: 204 | ret.update({'reg': reg}) 205 | if self.opt.debug > 0 or not self.split == 'train': 206 | gt_det = np.array(gt_det, dtype=np.float32) if len(gt_det) > 0 else \ 207 | np.zeros((1, 6), dtype=np.float32) 208 | meta = {'c': c, 's': s, 'gt_det': gt_det, 'img_id': img_id} 209 | ret['meta'] = meta 210 | return ret 211 | -------------------------------------------------------------------------------- /src/lib/detectors/base_detector.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | from models.model import create_model, load_model 12 | from utils.image import get_affine_transform 13 | from utils.debugger import Debugger 14 | 15 | import boto3 16 | import io 17 | import Image 18 | 19 | host = "http://oss.wuhu-a.brainpp.cn" 20 | s3_client = boto3.client('s3', endpoint_url=host) 21 | 22 | 23 | class BaseDetector(object): 24 | def __init__(self, opt): 25 | if opt.gpus[0] >= 0: 26 | opt.device = torch.device('cuda') 27 | else: 28 | opt.device = torch.device('cpu') 29 | 30 | print('Creating model...') 31 | self.model = create_model(opt.arch, opt.heads, opt.head_conv) 32 | self.model = load_model(self.model, opt.load_model) 33 | self.model = self.model.to(opt.device) 34 | self.model.eval() 35 | 36 | self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) 37 | self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) 38 | self.max_per_image = 100 39 | self.num_obj_classes = opt.num_obj_classes 40 | self.num_act_classes = opt.num_act_classes 41 | self.scales = opt.test_scales 42 | self.opt = opt 43 | self.pause = True 44 | 45 | def pre_process(self, image, scale, meta=None): 46 | height, width = image.shape[0:2] 47 | new_height = int(height * scale) 48 | new_width = int(width * scale) 49 | if self.opt.fix_res: 50 | inp_height, inp_width = self.opt.input_h, self.opt.input_w 51 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) 52 | s = max(height, width) * 1.0 53 | else: 54 | inp_height = (new_height | self.opt.pad) + 1 55 | inp_width = (new_width | self.opt.pad) + 1 56 | c = np.array([new_width // 2, new_height // 2], dtype=np.float32) 57 | s = np.array([inp_width, inp_height], dtype=np.float32) 58 | 59 | trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height]) 60 | resized_image = cv2.resize(image, (new_width, new_height)) 61 | inp_image = cv2.warpAffine( 62 | resized_image, trans_input, (inp_width, inp_height), 63 | flags=cv2.INTER_LINEAR) 64 | inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32) 65 | 66 | images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width) 67 | if self.opt.flip_test: 68 | images = np.concatenate((images, images[:, :, :, ::-1]), axis=0) 69 | images = torch.from_numpy(images) 70 | meta = {'c': c, 's': s, 71 | 'out_height': inp_height // self.opt.down_ratio, 72 | 'out_width': inp_width // self.opt.down_ratio} 73 | return images, meta 74 | 75 | def process(self, images, return_time=False): 76 | raise NotImplementedError 77 | 78 | def post_process(self, dets_act, meta, scale=1): 79 | raise NotImplementedError 80 | 81 | def merge_outputs(self, detections): 82 | raise NotImplementedError 83 | 84 | def debug(self, debugger, images, dets, output, scale=1): 85 | raise NotImplementedError 86 | 87 | def show_results(self, debugger, image, results): 88 | raise NotImplementedError 89 | 90 | def run(self, img_path, meta=None): 91 | pre_processed = False 92 | # if isinstance(image_or_path_or_tensor, np.ndarray): 93 | # image = image_or_path_or_tensor 94 | # elif type(image_or_path_or_tensor) == type (''): 95 | # image = cv2.imread(image_or_path_or_tensor) 96 | # else: 97 | # image = image_or_path_or_tensor['image'][0].numpy() 98 | # pre_processed_images = image_or_path_or_tensor 99 | # pre_processed = True 100 | 101 | img_obj = s3_client.get_object(Bucket="wangtiancai", Key=img_path) 102 | image = np.array(Image.open(io.BytesIO(img_obj['Body'].read())).convert('RGB'), dtype=np.float32) 103 | 104 | results = [] 105 | for scale in self.scales: 106 | scale_start_time = time.time() 107 | if not pre_processed: 108 | images, meta = self.pre_process(image, scale, meta) 109 | else: 110 | # import pdb; pdb.set_trace() 111 | images = pre_processed_images['images'][scale][0] 112 | meta = pre_processed_images['meta'][scale] 113 | meta = {k: v.numpy()[0] for k, v in meta.items()} 114 | images = images.to(self.opt.device) 115 | 116 | # print(images.shape) 117 | 118 | output, dets_act, forward_time = self.process(images, return_time=True) 119 | 120 | dets_act = self.post_process(dets_act, meta, scale) 121 | 122 | results.append(dets_act) 123 | 124 | return results 125 | -------------------------------------------------------------------------------- /src/lib/detectors/ctdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import cv2 6 | import numpy as np 7 | from progress.bar import Bar 8 | import time 9 | import torch 10 | 11 | # from external.nms import soft_nms 12 | from models.decode import ctdet_decode 13 | from models.utils import flip_tensor 14 | from utils.image import get_affine_transform 15 | from utils.post_process import ctdet_post_process 16 | from utils.debugger import Debugger 17 | 18 | from .base_detector import BaseDetector 19 | 20 | 21 | class CtdetDetector(BaseDetector): 22 | def __init__(self, opt): 23 | super(CtdetDetector, self).__init__(opt) 24 | 25 | def process(self, images, return_time=False): 26 | with torch.no_grad(): 27 | output = self.model(images)[-1] 28 | hm_act = output['hm_act_f'].sigmoid_() 29 | reg_act = None 30 | wh_act = output['wh_act'] 31 | if self.opt.flip_test: 32 | hm_act = (hm_act[0:1] + flip_tensor(hm_act[1:2])) / 2 33 | wh_act = (wh_act[0:1] + flip_tensor(wh_act[1:2])) / 2 34 | torch.cuda.synchronize() 35 | forward_time = time.time() 36 | dets_act = ctdet_decode(hm_act, wh_act, reg_act=reg_act, K=self.opt.K) 37 | 38 | if return_time: 39 | return output, dets_act, forward_time 40 | else: 41 | return output, dets_act 42 | 43 | 44 | def post_process(self, dets_act, meta, scale=1): 45 | dets_act = dets_act.detach().cpu().numpy() 46 | 47 | dets_act = dets_act.reshape(1, -1, dets_act.shape[2]) 48 | 49 | dets_act = ctdet_post_process( 50 | dets_act.copy(), [meta['c']], [meta['s']], 51 | meta['out_height'], meta['out_width'], self.opt.num_obj_classes, self.opt.num_act_classes) 52 | # print(dets_act) 53 | 54 | # for j in range(1, self.num_obj_classes + 1): 55 | # dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) 56 | # dets[0][j][:, :4] /= scale 57 | for j in range(1, self.num_act_classes + 1): 58 | dets_act[0][j] = np.array(dets_act[0][j], dtype=np.float32).reshape(-1, 7) 59 | dets_act[0][j][:, :6] /= scale 60 | 61 | # print(dets_act[0]) 62 | return dets_act[0] 63 | 64 | def merge_outputs(self, detections): 65 | results = {} 66 | for j in range(1, self.num_obj_classes + 1): 67 | results[j] = np.concatenate( 68 | [detection[j] for detection in detections], axis=0).astype(np.float32) 69 | if len(self.scales) > 1 or self.opt.nms: 70 | soft_nms(results[j], Nt=0.5, method=2) 71 | scores = np.hstack( 72 | [results[j][:, 4] for j in range(1, self.num_obj_classes + 1)]) 73 | if len(scores) > self.max_per_image: 74 | kth = len(scores) - self.max_per_image 75 | thresh = np.partition(scores, kth)[kth] 76 | for j in range(1, self.num_obj_classes + 1): 77 | keep_inds = (results[j][:, 4] >= thresh) 78 | results[j] = results[j][keep_inds] 79 | return results 80 | 81 | def debug(self, debugger, images, dets, output, scale=1): 82 | detection = dets.detach().cpu().numpy().copy() 83 | detection[:, :, :4] *= self.opt.down_ratio 84 | for i in range(1): 85 | img = images[i].detach().cpu().numpy().transpose(1, 2, 0) 86 | img = ((img * self.std + self.mean) * 255).astype(np.uint8) 87 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) 88 | debugger.add_blend_img(img, pred, 'pred_hm_{:.1f}'.format(scale)) 89 | debugger.add_img(img, img_id='out_pred_{:.1f}'.format(scale)) 90 | for k in range(len(dets[i])): 91 | if detection[i, k, 4] > self.opt.center_thresh: 92 | debugger.add_coco_bbox(detection[i, k, :4], detection[i, k, -1], 93 | detection[i, k, 4], 94 | img_id='out_pred_{:.1f}'.format(scale)) 95 | 96 | def show_results(self, debugger, image, results): 97 | debugger.add_img(image, img_id='ctdet') 98 | for j in range(1, self.num_obj_classes + 1): 99 | for bbox in results[j]: 100 | if bbox[4] > self.opt.vis_thresh: 101 | debugger.add_coco_bbox(bbox[:4], j - 1, bbox[4], img_id='ctdet') 102 | debugger.show_all_imgs(pause=self.pause) 103 | -------------------------------------------------------------------------------- /src/lib/detectors/detector_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .ctdet import CtdetDetector 6 | 7 | detector_factory = { 8 | 'ctdet': CtdetDetector 9 | } 10 | -------------------------------------------------------------------------------- /src/lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /src/lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /src/lib/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/external/__init__.py -------------------------------------------------------------------------------- /src/lib/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /src/lib/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 6 | import os 7 | import time 8 | import sys 9 | import torch 10 | USE_TENSORBOARD = True 11 | try: 12 | import tensorboardX 13 | print('Using tensorboardX') 14 | except: 15 | USE_TENSORBOARD = False 16 | 17 | class Logger(object): 18 | def __init__(self, opt): 19 | """Create a summary writer logging to log_dir.""" 20 | if not os.path.exists(opt.save_dir): 21 | os.makedirs(opt.save_dir) 22 | if not os.path.exists(opt.debug_dir): 23 | os.makedirs(opt.debug_dir) 24 | 25 | time_str = time.strftime('%Y-%m-%d-%H-%M') 26 | 27 | args = dict((name, getattr(opt, name)) for name in dir(opt) 28 | if not name.startswith('_')) 29 | file_name = os.path.join(opt.save_dir, 'opt.txt') 30 | with open(file_name, 'wt') as opt_file: 31 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 32 | opt_file.write('==> cudnn version: {}\n'.format( 33 | torch.backends.cudnn.version())) 34 | opt_file.write('==> Cmd:\n') 35 | opt_file.write(str(sys.argv)) 36 | opt_file.write('\n==> Opt:\n') 37 | for k, v in sorted(args.items()): 38 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 39 | 40 | log_dir = opt.save_dir + '/logs_{}'.format(time_str) 41 | if USE_TENSORBOARD: 42 | self.writer = tensorboardX.SummaryWriter(log_dir=log_dir) 43 | else: 44 | if not os.path.exists(os.path.dirname(log_dir)): 45 | os.mkdir(os.path.dirname(log_dir)) 46 | if not os.path.exists(log_dir): 47 | os.mkdir(log_dir) 48 | self.log = open(log_dir + '/log.txt', 'w') 49 | try: 50 | os.system('cp {}/opt.txt {}/'.format(opt.save_dir, log_dir)) 51 | except: 52 | pass 53 | self.start_line = True 54 | 55 | def write(self, txt): 56 | if self.start_line: 57 | time_str = time.strftime('%Y-%m-%d-%H-%M') 58 | self.log.write('{}: {}'.format(time_str, txt)) 59 | else: 60 | self.log.write(txt) 61 | self.start_line = False 62 | if '\n' in txt: 63 | self.start_line = True 64 | self.log.flush() 65 | 66 | def close(self): 67 | self.log.close() 68 | 69 | def scalar_summary(self, tag, value, step): 70 | """Log a scalar variable.""" 71 | if USE_TENSORBOARD: 72 | self.writer.add_scalar(tag, value, step) 73 | -------------------------------------------------------------------------------- /src/lib/models/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class _DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 43 | 44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 45 | super(_DataParallel, self).__init__() 46 | 47 | if not torch.cuda.is_available(): 48 | self.module = module 49 | self.device_ids = [] 50 | return 51 | 52 | if device_ids is None: 53 | device_ids = list(range(torch.cuda.device_count())) 54 | if output_device is None: 55 | output_device = device_ids[0] 56 | self.dim = dim 57 | self.module = module 58 | self.device_ids = device_ids 59 | self.chunk_sizes = chunk_sizes 60 | self.output_device = output_device 61 | if len(self.device_ids) == 1: 62 | self.module.cuda(device_ids[0]) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 68 | if len(self.device_ids) == 1: 69 | return self.module(*inputs[0], **kwargs[0]) 70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 71 | outputs = self.parallel_apply(replicas, inputs, kwargs) 72 | return self.gather(outputs, self.output_device) 73 | 74 | def replicate(self, module, device_ids): 75 | return replicate(module, device_ids) 76 | 77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 79 | 80 | def parallel_apply(self, replicas, inputs, kwargs): 81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 82 | 83 | def gather(self, outputs, output_device): 84 | return gather(outputs, output_device, dim=self.dim) 85 | 86 | 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 89 | 90 | This is the functional version of the DataParallel module. 91 | 92 | Args: 93 | module: the module to evaluate in parallel 94 | inputs: inputs to the module 95 | device_ids: GPU ids on which to replicate module 96 | output_device: GPU location of the output Use -1 to indicate the CPU. 97 | (default: device_ids[0]) 98 | Returns: 99 | a Variable containing the result of module(input) located on 100 | output_device 101 | """ 102 | if not isinstance(inputs, tuple): 103 | inputs = (inputs,) 104 | 105 | if device_ids is None: 106 | device_ids = list(range(torch.cuda.device_count())) 107 | 108 | if output_device is None: 109 | output_device = device_ids[0] 110 | 111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 112 | if len(device_ids) == 1: 113 | return module(*inputs[0], **module_kwargs[0]) 114 | used_device_ids = device_ids[:len(inputs)] 115 | replicas = replicate(module, used_device_ids) 116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 117 | return gather(outputs, output_device, dim) 118 | 119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 120 | if chunk_sizes is None: 121 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 122 | standard_size = True 123 | for i in range(1, len(chunk_sizes)): 124 | if chunk_sizes[i] != chunk_sizes[0]: 125 | standard_size = False 126 | if standard_size: 127 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes) -------------------------------------------------------------------------------- /src/lib/models/decode.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import numpy as np 8 | from .utils import _gather_feat, _tranpose_and_gather_feat 9 | from utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian 10 | 11 | 12 | def _nms(heat, kernel=3): 13 | pad = (kernel - 1) // 2 14 | 15 | hmax = nn.functional.max_pool2d( 16 | heat, (kernel, kernel), stride=1, padding=pad) 17 | keep = (hmax == heat).float() 18 | return heat * keep 19 | 20 | 21 | def _left_aggregate(heat): 22 | ''' 23 | heat: batchsize x channels x h x w 24 | ''' 25 | shape = heat.shape 26 | heat = heat.reshape(-1, heat.shape[3]) 27 | heat = heat.transpose(1, 0).contiguous() 28 | ret = heat.clone() 29 | for i in range(1, heat.shape[0]): 30 | inds = (heat[i] >= heat[i - 1]) 31 | ret[i] += ret[i - 1] * inds.float() 32 | return (ret - heat).transpose(1, 0).reshape(shape) 33 | 34 | 35 | def _right_aggregate(heat): 36 | ''' 37 | heat: batchsize x channels x h x w 38 | ''' 39 | shape = heat.shape 40 | heat = heat.reshape(-1, heat.shape[3]) 41 | heat = heat.transpose(1, 0).contiguous() 42 | ret = heat.clone() 43 | for i in range(heat.shape[0] - 2, -1, -1): 44 | inds = (heat[i] >= heat[i +1]) 45 | ret[i] += ret[i + 1] * inds.float() 46 | return (ret - heat).transpose(1, 0).reshape(shape) 47 | 48 | 49 | def _top_aggregate(heat): 50 | ''' 51 | heat: batchsize x channels x h x w 52 | ''' 53 | heat = heat.transpose(3, 2) 54 | shape = heat.shape 55 | heat = heat.reshape(-1, heat.shape[3]) 56 | heat = heat.transpose(1, 0).contiguous() 57 | ret = heat.clone() 58 | for i in range(1, heat.shape[0]): 59 | inds = (heat[i] >= heat[i - 1]) 60 | ret[i] += ret[i - 1] * inds.float() 61 | return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2) 62 | 63 | 64 | def _bottom_aggregate(heat): 65 | ''' 66 | heat: batchsize x channels x h x w 67 | ''' 68 | heat = heat.transpose(3, 2) 69 | shape = heat.shape 70 | heat = heat.reshape(-1, heat.shape[3]) 71 | heat = heat.transpose(1, 0).contiguous() 72 | ret = heat.clone() 73 | for i in range(heat.shape[0] - 2, -1, -1): 74 | inds = (heat[i] >= heat[i + 1]) 75 | ret[i] += ret[i + 1] * inds.float() 76 | return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2) 77 | 78 | 79 | def _h_aggregate(heat, aggr_weight=0.1): 80 | return aggr_weight * _left_aggregate(heat) + \ 81 | aggr_weight * _right_aggregate(heat) + heat 82 | 83 | 84 | def _v_aggregate(heat, aggr_weight=0.1): 85 | return aggr_weight * _top_aggregate(heat) + \ 86 | aggr_weight * _bottom_aggregate(heat) + heat 87 | ''' 88 | # Slow for large number of categories 89 | def _topk(scores, K=40): 90 | batch, cat, height, width = scores.size() 91 | topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K) 92 | 93 | topk_clses = (topk_inds / (height * width)).int() 94 | 95 | topk_inds = topk_inds % (height * width) 96 | topk_ys = (topk_inds / width).int().float() 97 | topk_xs = (topk_inds % width).int().float() 98 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs 99 | ''' 100 | 101 | 102 | def _topk_channel(scores, K=40): 103 | batch, cat, height, width = scores.size() 104 | 105 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 106 | 107 | topk_inds = topk_inds % (height * width) 108 | topk_ys = (topk_inds / width).int().float() 109 | topk_xs = (topk_inds % width).int().float() 110 | 111 | return topk_scores, topk_inds, topk_ys, topk_xs 112 | 113 | 114 | def _topk(scores, K=40): 115 | batch, cat, height, width = scores.size() 116 | 117 | topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) 118 | 119 | topk_inds = topk_inds % (height * width) 120 | topk_ys = (topk_inds / width).int().float() 121 | topk_xs = (topk_inds % width).int().float() 122 | 123 | topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K) 124 | topk_clses = (topk_ind / K).int() 125 | topk_inds = _gather_feat( 126 | topk_inds.view(batch, -1, 1), topk_ind).view(batch, K) 127 | topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K) 128 | topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K) 129 | 130 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 131 | 132 | 133 | def ctdet_decode(hm_act, wh_act, reg_act=None, K=100): 134 | batch, cat, height, width = hm_act.size() 135 | 136 | hm_act = _nms(hm_act, kernel=3) # need to adjust the kernel size. 137 | 138 | scores_act, inds_act, clses_act, ys_act, xs_act = _topk(hm_act, K=K) 139 | 140 | if reg_act is not None: 141 | reg_act = _tranpose_and_gather_feat(reg_act, inds_act) 142 | reg_act = reg_act.view(batch, K, 2) 143 | xs_act = xs_act.view(batch, K, 1) + reg_act[:, :, 0:1] 144 | ys_act = ys_act.view(batch, K, 1) + reg_act[:, :, 1:2] 145 | else: 146 | xs_act = xs_act.view(batch, K, 1) + 0.5 147 | ys_act = ys_act.view(batch, K, 1) + 0.5 148 | 149 | wh_act = _tranpose_and_gather_feat(wh_act, inds_act) 150 | 151 | wh_act = wh_act.view(batch, K, 2) 152 | 153 | clses_act = clses_act.view(batch, K, 1).float() 154 | scores_act = scores_act.view(batch, K, 1) 155 | 156 | bboxes = torch.cat([xs_act - wh_act[..., 0:1] / 2, 157 | ys_act - wh_act[..., 1:2] / 2, 158 | xs_act + wh_act[..., 0:1] / 2, 159 | ys_act + wh_act[..., 1:2] / 2], dim=2) 160 | 161 | detections_act = torch.cat([xs_act, ys_act, bboxes, scores_act, clses_act], dim=2) 162 | 163 | # print(detections_act.shape) 164 | # print(detections_act) 165 | 166 | return detections_act 167 | -------------------------------------------------------------------------------- /src/lib/models/losses.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Portions of this code are from 3 | # CornerNet (https://github.com/princeton-vl/CornerNet) 4 | # Copyright (c) 2018, University of Michigan 5 | # Licensed under the BSD 3-Clause License 6 | # ------------------------------------------------------------------------------ 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import torch 12 | import torch.nn as nn 13 | from .utils import _tranpose_and_gather_feat 14 | import torch.nn.functional as F 15 | 16 | 17 | def _slow_neg_loss(pred, gt): 18 | '''focal loss from CornerNet''' 19 | pos_inds = gt.eq(1) 20 | neg_inds = gt.lt(1) 21 | 22 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 23 | 24 | loss = 0 25 | pos_pred = pred[pos_inds] 26 | neg_pred = pred[neg_inds] 27 | 28 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 29 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 30 | 31 | num_pos = pos_inds.float().sum() 32 | pos_loss = pos_loss.sum() 33 | neg_loss = neg_loss.sum() 34 | 35 | if pos_pred.nelement() == 0: 36 | loss = loss - neg_loss 37 | else: 38 | loss = loss - (pos_loss + neg_loss) / num_pos 39 | return loss 40 | 41 | 42 | def _neg_loss(pred, gt): 43 | ''' Modified focal loss. Exactly the same as CornerNet. 44 | Runs faster and costs a little bit more memory 45 | Arguments: 46 | pred (batch x c x h x w) 47 | gt_regr (batch x c x h x w) 48 | ''' 49 | pos_inds = gt.eq(1).float() 50 | neg_inds = gt.lt(1).float() 51 | 52 | neg_weights = torch.pow(1 - gt, 4) 53 | 54 | loss = 0 55 | 56 | pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds 57 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds 58 | 59 | num_pos = pos_inds.float().sum() 60 | pos_loss = pos_loss.sum() 61 | neg_loss = neg_loss.sum() 62 | 63 | if num_pos == 0: 64 | loss = loss - neg_loss 65 | else: 66 | loss = loss - (pos_loss + neg_loss) / num_pos 67 | return loss 68 | 69 | def _not_faster_neg_loss(pred, gt): 70 | pos_inds = gt.eq(1).float() 71 | neg_inds = gt.lt(1).float() 72 | num_pos = pos_inds.float().sum() 73 | neg_weights = torch.pow(1 - gt, 4) 74 | 75 | loss = 0 76 | trans_pred = pred * neg_inds + (1 - pred) * pos_inds 77 | weight = neg_weights * neg_inds + pos_inds 78 | all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight 79 | all_loss = all_loss.sum() 80 | 81 | if num_pos > 0: 82 | all_loss /= num_pos 83 | loss -= all_loss 84 | return loss 85 | 86 | def _slow_reg_loss(regr, gt_regr, mask): 87 | num = mask.float().sum() 88 | mask = mask.unsqueeze(2).expand_as(gt_regr) 89 | 90 | regr = regr[mask] 91 | gt_regr = gt_regr[mask] 92 | 93 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 94 | regr_loss = regr_loss / (num + 1e-4) 95 | return regr_loss 96 | 97 | def _reg_loss(regr, gt_regr, mask): 98 | ''' L1 regression loss 99 | Arguments: 100 | regr (batch x max_objects x dim) 101 | gt_regr (batch x max_objects x dim) 102 | mask (batch x max_objects) 103 | ''' 104 | num = mask.float().sum() 105 | mask = mask.unsqueeze(2).expand_as(gt_regr).float() 106 | 107 | regr = regr * mask 108 | gt_regr = gt_regr * mask 109 | 110 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 111 | regr_loss = regr_loss / (num + 1e-4) 112 | return regr_loss 113 | 114 | class FocalLoss(nn.Module): 115 | '''nn.Module warpper for focal loss''' 116 | def __init__(self): 117 | super(FocalLoss, self).__init__() 118 | self.neg_loss = _neg_loss 119 | 120 | def forward(self, out, target): 121 | return self.neg_loss(out, target) 122 | 123 | class RegLoss(nn.Module): 124 | '''Regression loss for an output tensor 125 | Arguments: 126 | output (batch x dim x h x w) 127 | mask (batch x max_objects) 128 | ind (batch x max_objects) 129 | target (batch x max_objects x dim) 130 | ''' 131 | def __init__(self): 132 | super(RegLoss, self).__init__() 133 | 134 | def forward(self, output, mask, ind, target): 135 | pred = _tranpose_and_gather_feat(output, ind) 136 | loss = _reg_loss(pred, target, mask) 137 | return loss 138 | 139 | class RegL1Loss(nn.Module): 140 | def __init__(self): 141 | super(RegL1Loss, self).__init__() 142 | 143 | def forward(self, output, mask, ind, target): 144 | pred = _tranpose_and_gather_feat(output, ind) 145 | mask = mask.unsqueeze(2).expand_as(pred).float() 146 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 147 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 148 | loss = loss / (mask.sum() + 1e-4) 149 | return loss 150 | 151 | class NormRegL1Loss(nn.Module): 152 | def __init__(self): 153 | super(NormRegL1Loss, self).__init__() 154 | 155 | def forward(self, output, mask, ind, target): 156 | pred = _tranpose_and_gather_feat(output, ind) 157 | mask = mask.unsqueeze(2).expand_as(pred).float() 158 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 159 | pred = pred / (target + 1e-4) 160 | target = target * 0 + 1 161 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 162 | loss = loss / (mask.sum() + 1e-4) 163 | return loss 164 | 165 | class RegWeightedL1Loss(nn.Module): 166 | def __init__(self): 167 | super(RegWeightedL1Loss, self).__init__() 168 | 169 | def forward(self, output, mask, ind, target): 170 | pred = _tranpose_and_gather_feat(output, ind) 171 | mask = mask.float() 172 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 173 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 174 | loss = loss / (mask.sum() + 1e-4) 175 | return loss 176 | 177 | class L1Loss(nn.Module): 178 | def __init__(self): 179 | super(L1Loss, self).__init__() 180 | 181 | def forward(self, output, mask, ind, target): 182 | pred = _tranpose_and_gather_feat(output, ind) 183 | mask = mask.unsqueeze(2).expand_as(pred).float() 184 | loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 185 | return loss 186 | 187 | class BinRotLoss(nn.Module): 188 | def __init__(self): 189 | super(BinRotLoss, self).__init__() 190 | 191 | def forward(self, output, mask, ind, rotbin, rotres): 192 | pred = _tranpose_and_gather_feat(output, ind) 193 | loss = compute_rot_loss(pred, rotbin, rotres, mask) 194 | return loss 195 | 196 | def compute_res_loss(output, target): 197 | return F.smooth_l1_loss(output, target, reduction='elementwise_mean') 198 | 199 | # TODO: weight 200 | def compute_bin_loss(output, target, mask): 201 | mask = mask.expand_as(output) 202 | output = output * mask.float() 203 | return F.cross_entropy(output, target, reduction='elementwise_mean') 204 | 205 | def compute_rot_loss(output, target_bin, target_res, mask): 206 | # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 207 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 208 | # target_bin: (B, 128, 2) [bin1_cls, bin2_cls] 209 | # target_res: (B, 128, 2) [bin1_res, bin2_res] 210 | # mask: (B, 128, 1) 211 | # import pdb; pdb.set_trace() 212 | output = output.view(-1, 8) 213 | target_bin = target_bin.view(-1, 2) 214 | target_res = target_res.view(-1, 2) 215 | mask = mask.view(-1, 1) 216 | loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask) 217 | loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask) 218 | loss_res = torch.zeros_like(loss_bin1) 219 | if target_bin[:, 0].nonzero().shape[0] > 0: 220 | idx1 = target_bin[:, 0].nonzero()[:, 0] 221 | valid_output1 = torch.index_select(output, 0, idx1.long()) 222 | valid_target_res1 = torch.index_select(target_res, 0, idx1.long()) 223 | loss_sin1 = compute_res_loss( 224 | valid_output1[:, 2], torch.sin(valid_target_res1[:, 0])) 225 | loss_cos1 = compute_res_loss( 226 | valid_output1[:, 3], torch.cos(valid_target_res1[:, 0])) 227 | loss_res += loss_sin1 + loss_cos1 228 | if target_bin[:, 1].nonzero().shape[0] > 0: 229 | idx2 = target_bin[:, 1].nonzero()[:, 0] 230 | valid_output2 = torch.index_select(output, 0, idx2.long()) 231 | valid_target_res2 = torch.index_select(target_res, 0, idx2.long()) 232 | loss_sin2 = compute_res_loss( 233 | valid_output2[:, 6], torch.sin(valid_target_res2[:, 1])) 234 | loss_cos2 = compute_res_loss( 235 | valid_output2[:, 7], torch.cos(valid_target_res2[:, 1])) 236 | loss_res += loss_sin2 + loss_cos2 237 | return loss_bin1 + loss_bin2 + loss_res 238 | -------------------------------------------------------------------------------- /src/lib/models/model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torchvision.models as models 6 | import torch 7 | import torch.nn as nn 8 | import os 9 | 10 | # from .networks.msra_resnet import get_pose_net 11 | # from .networks.dlav0 import get_pose_net as get_dlav0 12 | # from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn 13 | # from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn 14 | from .networks.large_hourglass import get_large_hourglass_net 15 | 16 | _model_factory = { 17 | # 'res': get_pose_net, # default Resnet with deconv 18 | # 'dlav0': get_dlav0, # default DLAup 19 | # 'dla': get_dla_dcn, 20 | # 'resdcn': get_pose_net_dcn, 21 | 'hourglass': get_large_hourglass_net, 22 | } 23 | 24 | 25 | def create_model(arch, heads, head_conv): 26 | num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0 27 | arch = arch[:arch.find('_')] if '_' in arch else arch 28 | get_model = _model_factory[arch] 29 | model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv) 30 | return model 31 | 32 | 33 | # def load_model(model, model_path, optimizer=None, resume=False, 34 | # lr=None, lr_step=None): 35 | # start_epoch = 0 36 | # checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 37 | # print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 38 | # state_dict_ = checkpoint['state_dict'] 39 | # state_dict = {} 40 | # 41 | # # convert data_parallal to model 42 | # for k in state_dict_: 43 | # if k.startswith('module') and not k.startswith('module_list'): 44 | # state_dict[k[7:]] = state_dict_[k] 45 | # else: 46 | # state_dict[k] = state_dict_[k] 47 | # model_state_dict = model.state_dict() 48 | # 49 | # # check loaded parameters and created model parameters 50 | # for k in state_dict: 51 | # if k in model_state_dict: 52 | # if state_dict[k].shape != model_state_dict[k].shape: 53 | # print('Skip loading parameter {}, required shape{}, '\ 54 | # 'loaded shape{}.'.format( 55 | # k, model_state_dict[k].shape, state_dict[k].shape)) 56 | # state_dict[k] = model_state_dict[k] 57 | # else: 58 | # print('Drop parameter {}.'.format(k)) 59 | # for k in model_state_dict: 60 | # if not (k in state_dict): 61 | # print('No param {}.'.format(k)) 62 | # state_dict[k] = model_state_dict[k] 63 | # model.load_state_dict(state_dict, strict=False) 64 | # 65 | # # resume optimizer parameters 66 | # if optimizer is not None and resume: 67 | # if 'optimizer' in checkpoint: 68 | # optimizer.load_state_dict(checkpoint['optimizer']) 69 | # start_epoch = checkpoint['epoch'] 70 | # start_lr = lr 71 | # for step in lr_step: 72 | # if start_epoch >= step: 73 | # start_lr *= 0.1 74 | # for param_group in optimizer.param_groups: 75 | # param_group['lr'] = start_lr 76 | # print('Resumed optimizer with start lr', start_lr) 77 | # else: 78 | # print('No optimizer parameters in checkpoint.') 79 | # if optimizer is not None: 80 | # return model, optimizer, start_epoch 81 | # else: 82 | # return model 83 | 84 | def load_model(model, model_path): 85 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 86 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 87 | state_dict_ = checkpoint['state_dict'] 88 | state_dict = {} 89 | 90 | # convert data_parallal to model 91 | for k in state_dict_: 92 | if k.startswith('module') and not k.startswith('module_list'): 93 | state_dict[k[7:]] = state_dict_[k] 94 | else: 95 | state_dict[k] = state_dict_[k] 96 | model_state_dict = model.state_dict() 97 | 98 | # check loaded parameters and created model parameters 99 | for k in state_dict: 100 | if k in model_state_dict: 101 | if state_dict[k].shape != model_state_dict[k].shape: 102 | print('Skip loading parameter {}, required shape{}, '\ 103 | 'loaded shape{}.'.format( 104 | k, model_state_dict[k].shape, state_dict[k].shape)) 105 | state_dict[k] = model_state_dict[k] 106 | else: 107 | print('Drop parameter {}.'.format(k)) 108 | for k in model_state_dict: 109 | if not (k in state_dict): 110 | print('No param {}.'.format(k)) 111 | state_dict[k] = model_state_dict[k] 112 | model.load_state_dict(state_dict, strict=False) 113 | 114 | return model 115 | 116 | 117 | def save_model(path, epoch, model, optimizer=None): 118 | if isinstance(model, torch.nn.DataParallel): 119 | state_dict = model.module.state_dict() 120 | else: 121 | state_dict = model.state_dict() 122 | data = {'epoch': epoch, 123 | 'state_dict': state_dict} 124 | if not (optimizer is None): 125 | data['optimizer'] = optimizer.state_dict() 126 | torch.save(data, path) 127 | 128 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | *.so 4 | *.o 5 | *pyc 6 | _ext -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python test.py # run examples and gradient check 7 | ``` 8 | 9 | ### An Example 10 | - deformable conv 11 | ```python 12 | from dcn_v2 import DCN 13 | input = torch.randn(2, 64, 128, 128).cuda() 14 | # wrap all things (offset and mask) in DCN 15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda() 16 | output = dcn(input) 17 | print(output.shape) 18 | ``` 19 | - deformable roi pooling 20 | ```python 21 | from dcn_v2 import DCNPooling 22 | input = torch.randn(2, 32, 64, 64).cuda() 23 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 24 | x = torch.randint(256, (20, 1)).cuda().float() 25 | y = torch.randint(256, (20, 1)).cuda().float() 26 | w = torch.randint(64, (20, 1)).cuda().float() 27 | h = torch.randint(64, (20, 1)).cuda().float() 28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 29 | 30 | # mdformable pooling (V2) 31 | # wrap all things (offset and mask) in DCNPooling 32 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 33 | pooled_size=7, 34 | output_dim=32, 35 | no_trans=False, 36 | group_size=1, 37 | trans_std=0.1).cuda() 38 | 39 | dout = dpooling(input, rois) 40 | ``` 41 | 42 | ### Known Issues: 43 | 44 | - [x] Gradient check w.r.t offset (solved) 45 | - [ ] Backward is not reentrant (minor) 46 | 47 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 48 | 49 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes. 50 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some 51 | non-differential points? 52 | 53 | Update: all gradient check passes with double precision. 54 | 55 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 56 | float `<1e-15` for double), 57 | so it may not be a serious problem (?) 58 | 59 | Please post an issue or PR if you have any comments. 60 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/models/networks/DCNv2/__init__.py -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | # from torch.utils.cpp_extension import BuildExtension 5 | 6 | 7 | sources = ['src/dcn_v2.c'] 8 | headers = ['src/dcn_v2.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | extra_objects = [] 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/dcn_v2_cuda.c'] 16 | headers += ['src/dcn_v2_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda.cu.o'] 19 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda.cu.o'] 20 | with_cuda = True 21 | else: 22 | raise ValueError('CUDA is not available') 23 | 24 | extra_compile_args = ['-fopenmp', '-std=c99'] 25 | 26 | this_file = os.path.dirname(os.path.realpath(__file__)) 27 | print(this_file) 28 | sources = [os.path.join(this_file, fname) for fname in sources] 29 | headers = [os.path.join(this_file, fname) for fname in headers] 30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 31 | 32 | 33 | # ffi = BuildExtension( 34 | ffi = create_extension( 35 | '_ext.dcn_v2', 36 | headers=headers, 37 | sources=sources, 38 | define_macros=defines, 39 | relative_to=__file__, 40 | with_cuda=with_cuda, 41 | extra_objects=extra_objects, 42 | extra_compile_args=extra_compile_args 43 | ) 44 | 45 | if __name__ == '__main__': 46 | ffi.build() 47 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/build_double.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | # from torch.utils.cpp_extension import BuildExtension 5 | 6 | 7 | sources = ['src/dcn_v2_double.c'] 8 | headers = ['src/dcn_v2_double.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | extra_objects = [] 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/dcn_v2_cuda_double.c'] 16 | headers += ['src/dcn_v2_cuda_double.h'] 17 | defines += [('WITH_CUDA', None)] 18 | extra_objects += ['src/cuda/dcn_v2_im2col_cuda_double.cu.o'] 19 | extra_objects += ['src/cuda/dcn_v2_psroi_pooling_cuda_double.cu.o'] 20 | with_cuda = True 21 | else: 22 | raise ValueError('CUDA is not available') 23 | 24 | extra_compile_args = ['-fopenmp', '-std=c99'] 25 | 26 | this_file = os.path.dirname(os.path.realpath(__file__)) 27 | print(this_file) 28 | sources = [os.path.join(this_file, fname) for fname in sources] 29 | headers = [os.path.join(this_file, fname) for fname in headers] 30 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 31 | 32 | # ffi = BuildExtension( 33 | ffi = create_extension( 34 | '_ext.dcn_v2_double', 35 | headers=headers, 36 | sources=sources, 37 | define_macros=defines, 38 | relative_to=__file__, 39 | with_cuda=with_cuda, 40 | extra_objects=extra_objects, 41 | extra_compile_args=extra_compile_args 42 | ) 43 | 44 | if __name__ == '__main__': 45 | ffi.build() 46 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/dcn_v2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import torch 7 | import math 8 | from torch import nn 9 | from torch.nn.modules.utils import _pair 10 | 11 | from .dcn_v2_func import DCNv2Function 12 | from .dcn_v2_func import DCNv2PoolingFunction 13 | 14 | class DCNv2(nn.Module): 15 | 16 | def __init__(self, in_channels, out_channels, 17 | kernel_size, stride, padding, dilation=1, deformable_groups=1): 18 | super(DCNv2, self).__init__() 19 | self.in_channels = in_channels 20 | self.out_channels = out_channels 21 | self.kernel_size = _pair(kernel_size) 22 | self.stride = stride 23 | self.padding = padding 24 | self.dilation = dilation 25 | self.deformable_groups = deformable_groups 26 | 27 | self.weight = nn.Parameter(torch.Tensor(out_channels, in_channels, *self.kernel_size)) 28 | self.bias = nn.Parameter(torch.Tensor(out_channels)) 29 | self.reset_parameters() 30 | 31 | def reset_parameters(self): 32 | n = self.in_channels 33 | for k in self.kernel_size: 34 | n *= k 35 | stdv = 1. / math.sqrt(n) 36 | self.weight.data.uniform_(-stdv, stdv) 37 | self.bias.data.zero_() 38 | 39 | def forward(self, input, offset, mask): 40 | func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups) 41 | return func(input, offset, mask, self.weight, self.bias) 42 | 43 | 44 | class DCN(DCNv2): 45 | 46 | def __init__(self, in_channels, out_channels, 47 | kernel_size, stride, padding, 48 | dilation=1, deformable_groups=1): 49 | super(DCN, self).__init__(in_channels, out_channels, 50 | kernel_size, stride, padding, dilation, deformable_groups) 51 | 52 | self.conv_offset_mask = nn.Conv2d(self.in_channels, 53 | self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1], 54 | kernel_size=self.kernel_size, 55 | stride=(self.stride, self.stride), 56 | padding=(self.padding, self.padding), 57 | bias=True) 58 | self.init_offset() 59 | 60 | def init_offset(self): 61 | self.conv_offset_mask.weight.data.zero_() 62 | self.conv_offset_mask.bias.data.zero_() 63 | 64 | def forward(self, input): 65 | out = self.conv_offset_mask(input) 66 | o1, o2, mask = torch.chunk(out, 3, dim=1) 67 | offset = torch.cat((o1, o2), dim=1) 68 | mask = torch.sigmoid(mask) 69 | func = DCNv2Function(self.stride, self.padding, self.dilation, self.deformable_groups) 70 | return func(input, offset, mask, self.weight, self.bias) 71 | 72 | 73 | class DCNv2Pooling(nn.Module): 74 | 75 | def __init__(self, 76 | spatial_scale, 77 | pooled_size, 78 | output_dim, 79 | no_trans, 80 | group_size=1, 81 | part_size=None, 82 | sample_per_part=4, 83 | trans_std=.0): 84 | super(DCNv2Pooling, self).__init__() 85 | self.spatial_scale = spatial_scale 86 | self.pooled_size = pooled_size 87 | self.output_dim = output_dim 88 | self.no_trans = no_trans 89 | self.group_size = group_size 90 | self.part_size = pooled_size if part_size is None else part_size 91 | self.sample_per_part = sample_per_part 92 | self.trans_std = trans_std 93 | self.func = DCNv2PoolingFunction(self.spatial_scale, 94 | self.pooled_size, 95 | self.output_dim, 96 | self.no_trans, 97 | self.group_size, 98 | self.part_size, 99 | self.sample_per_part, 100 | self.trans_std) 101 | 102 | def forward(self, data, rois, offset): 103 | 104 | if self.no_trans: 105 | offset = data.new() 106 | return self.func(data, rois, offset) 107 | 108 | class DCNPooling(DCNv2Pooling): 109 | 110 | def __init__(self, 111 | spatial_scale, 112 | pooled_size, 113 | output_dim, 114 | no_trans, 115 | group_size=1, 116 | part_size=None, 117 | sample_per_part=4, 118 | trans_std=.0, 119 | deform_fc_dim=1024): 120 | super(DCNPooling, self).__init__(spatial_scale, 121 | pooled_size, 122 | output_dim, 123 | no_trans, 124 | group_size, 125 | part_size, 126 | sample_per_part, 127 | trans_std) 128 | 129 | self.deform_fc_dim = deform_fc_dim 130 | 131 | if not no_trans: 132 | self.func_offset = DCNv2PoolingFunction(self.spatial_scale, 133 | self.pooled_size, 134 | self.output_dim, 135 | True, 136 | self.group_size, 137 | self.part_size, 138 | self.sample_per_part, 139 | self.trans_std) 140 | self.offset_fc = nn.Sequential( 141 | nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim), 142 | nn.ReLU(inplace=True), 143 | nn.Linear(self.deform_fc_dim, self.deform_fc_dim), 144 | nn.ReLU(inplace=True), 145 | nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 2) 146 | ) 147 | self.offset_fc[4].weight.data.zero_() 148 | self.offset_fc[4].bias.data.zero_() 149 | self.mask_fc = nn.Sequential( 150 | nn.Linear(self.pooled_size * self.pooled_size * self.output_dim, self.deform_fc_dim), 151 | nn.ReLU(inplace=True), 152 | nn.Linear(self.deform_fc_dim, self.pooled_size * self.pooled_size * 1), 153 | nn.Sigmoid() 154 | ) 155 | self.mask_fc[2].weight.data.zero_() 156 | self.mask_fc[2].bias.data.zero_() 157 | 158 | def forward(self, data, rois): 159 | if self.no_trans: 160 | offset = data.new() 161 | else: 162 | n = rois.shape[0] 163 | offset = data.new() 164 | x = self.func_offset(data, rois, offset) 165 | offset = self.offset_fc(x.view(n, -1)) 166 | offset = offset.view(n, 2, self.pooled_size, self.pooled_size) 167 | mask = self.mask_fc(x.view(n, -1)) 168 | mask = mask.view(n, 1, self.pooled_size, self.pooled_size) 169 | feat = self.func(data, rois, offset) * mask 170 | return feat 171 | return self.func(data, rois, offset) 172 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/dcn_v2_func.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import torch 7 | from torch.autograd import Function 8 | 9 | from ._ext import dcn_v2 as _backend 10 | # from _ext import dcn_v2_double as _backend 11 | 12 | 13 | class DCNv2Function(Function): 14 | 15 | def __init__(self, stride, padding, dilation=1, deformable_groups=1): 16 | super(DCNv2Function, self).__init__() 17 | self.stride = stride 18 | self.padding = padding 19 | self.dilation = dilation 20 | self.deformable_groups = deformable_groups 21 | 22 | def forward(self, input, offset, mask, weight, bias): 23 | if not input.is_cuda: 24 | raise NotImplementedError 25 | if weight.requires_grad or mask.requires_grad or offset.requires_grad or input.requires_grad: 26 | self.save_for_backward(input, offset, mask, weight, bias) 27 | output = input.new(*self._infer_shape(input, weight)) 28 | self._bufs = [input.new(), input.new()] 29 | _backend.dcn_v2_cuda_forward(input, weight, 30 | bias, self._bufs[0], 31 | offset, mask, 32 | output, self._bufs[1], 33 | weight.shape[2], weight.shape[3], 34 | self.stride, self.stride, 35 | self.padding, self.padding, 36 | self.dilation, self.dilation, 37 | self.deformable_groups) 38 | return output 39 | 40 | def backward(self, grad_output): 41 | if not grad_output.is_cuda: 42 | raise NotImplementedError 43 | input, offset, mask, weight, bias = self.saved_tensors 44 | grad_input = input.new(*input.size()).zero_() 45 | grad_offset = offset.new(*offset.size()).zero_() 46 | grad_mask = mask.new(*mask.size()).zero_() 47 | grad_weight = weight.new(*weight.size()).zero_() 48 | grad_bias = bias.new(*bias.size()).zero_() 49 | _backend.dcn_v2_cuda_backward(input, weight, 50 | bias, self._bufs[0], 51 | offset, mask, 52 | self._bufs[1], 53 | grad_input, grad_weight, 54 | grad_bias, grad_offset, 55 | grad_mask, grad_output, 56 | weight.shape[2], weight.shape[3], 57 | self.stride, self.stride, 58 | self.padding, self.padding, 59 | self.dilation, self.dilation, 60 | self.deformable_groups) 61 | 62 | return grad_input, grad_offset, grad_mask, grad_weight, grad_bias 63 | 64 | def _infer_shape(self, input, weight): 65 | n = input.size(0) 66 | channels_out = weight.size(0) 67 | height, width = input.shape[2:4] 68 | kernel_h, kernel_w = weight.shape[2:4] 69 | height_out = (height + 2 * self.padding - 70 | (self.dilation * (kernel_h - 1) + 1)) // self.stride + 1 71 | width_out = (width + 2 * self.padding - (self.dilation * 72 | (kernel_w - 1) + 1)) // self.stride + 1 73 | return (n, channels_out, height_out, width_out) 74 | 75 | 76 | class DCNv2PoolingFunction(Function): 77 | 78 | def __init__(self, 79 | spatial_scale, 80 | pooled_size, 81 | output_dim, 82 | no_trans, 83 | group_size=1, 84 | part_size=None, 85 | sample_per_part=4, 86 | trans_std=.0): 87 | super(DCNv2PoolingFunction, self).__init__() 88 | self.spatial_scale = spatial_scale 89 | self.pooled_size = pooled_size 90 | self.output_dim = output_dim 91 | self.no_trans = no_trans 92 | self.group_size = group_size 93 | self.part_size = pooled_size if part_size is None else part_size 94 | self.sample_per_part = sample_per_part 95 | self.trans_std = trans_std 96 | 97 | assert self.trans_std >= 0.0 and self.trans_std <= 1.0 98 | 99 | def forward(self, data, rois, offset): 100 | if not data.is_cuda: 101 | raise NotImplementedError 102 | 103 | output = data.new(*self._infer_shape(data, rois)) 104 | output_count = data.new(*self._infer_shape(data, rois)) 105 | _backend.dcn_v2_psroi_pooling_cuda_forward(data, rois, offset, 106 | output, output_count, 107 | self.no_trans, self.spatial_scale, 108 | self.output_dim, self.group_size, 109 | self.pooled_size, self.part_size, 110 | self.sample_per_part, self.trans_std) 111 | 112 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 113 | self.save_for_backward(data, rois, offset, output_count) 114 | 115 | return output 116 | 117 | def backward(self, grad_output): 118 | if not grad_output.is_cuda: 119 | raise NotImplementedError 120 | 121 | data, rois, offset, output_count = self.saved_tensors 122 | grad_input = data.new(*data.size()).zero_() 123 | grad_offset = offset.new(*offset.size()).zero_() 124 | 125 | _backend.dcn_v2_psroi_pooling_cuda_backward(grad_output, 126 | data, 127 | rois, 128 | offset, 129 | output_count, 130 | grad_input, 131 | grad_offset, 132 | self.no_trans, 133 | self.spatial_scale, 134 | self.output_dim, 135 | self.group_size, 136 | self.pooled_size, 137 | self.part_size, 138 | self.sample_per_part, 139 | self.trans_std) 140 | return grad_input, None, grad_offset 141 | 142 | def _infer_shape(self, data, rois): 143 | # _, c, h, w = data.shape[:4] 144 | c = data.shape[1] 145 | n = rois.shape[0] 146 | return (n, self.output_dim, self.pooled_size, self.pooled_size) 147 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd src/cuda 3 | 4 | # compile dcn 5 | nvcc -c -o dcn_v2_im2col_cuda.cu.o dcn_v2_im2col_cuda.cu -x cu -Xcompiler -fPIC 6 | nvcc -c -o dcn_v2_im2col_cuda_double.cu.o dcn_v2_im2col_cuda_double.cu -x cu -Xcompiler -fPIC 7 | 8 | # compile dcn-roi-pooling 9 | nvcc -c -o dcn_v2_psroi_pooling_cuda.cu.o dcn_v2_psroi_pooling_cuda.cu -x cu -Xcompiler -fPIC 10 | nvcc -c -o dcn_v2_psroi_pooling_cuda_double.cu.o dcn_v2_psroi_pooling_cuda_double.cu -x cu -Xcompiler -fPIC 11 | 12 | cd - 13 | python build.py 14 | python build_double.py 15 | -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 3 | * 4 | * COPYRIGHT 5 | * 6 | * All contributions by the University of California: 7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 8 | * All rights reserved. 9 | * 10 | * All other contributions: 11 | * Copyright (c) 2014-2017, the respective contributors 12 | * All rights reserved. 13 | * 14 | * Caffe uses a shared copyright model: each contributor holds copyright over 15 | * their contributions to Caffe. The project versioning records all such 16 | * contribution and copyright details. If a contributor wants to further mark 17 | * their specific copyright on a particular contribution, they should indicate 18 | * their copyright solely in the commit message of the change when it is 19 | * committed. 20 | * 21 | * LICENSE 22 | * 23 | * Redistribution and use in source and binary forms, with or without 24 | * modification, are permitted provided that the following conditions are met: 25 | * 26 | * 1. Redistributions of source code must retain the above copyright notice, this 27 | * list of conditions and the following disclaimer. 28 | * 2. Redistributions in binary form must reproduce the above copyright notice, 29 | * this list of conditions and the following disclaimer in the documentation 30 | * and/or other materials provided with the distribution. 31 | * 32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | * 43 | * CONTRIBUTION AGREEMENT 44 | * 45 | * By contributing to the BVLC/caffe repository through pull-request, comment, 46 | * or otherwise, the contributor releases their content to the 47 | * license and copyright terms herein. 48 | * 49 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 50 | * 51 | * Copyright (c) 2018 Microsoft 52 | * Licensed under The MIT License [see LICENSE for details] 53 | * \file modulated_deformable_im2col.h 54 | * \brief Function definitions of converting an image to 55 | * column matrix based on kernel, padding, dilation, and offset. 56 | * These functions are mainly used in deformable convolution operators. 57 | * \ref: https://arxiv.org/abs/1811.11168 58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 59 | */ 60 | 61 | /***************** Adapted by Charles Shang *********************/ 62 | 63 | #ifndef DCN_V2_IM2COL_CUDA 64 | #define DCN_V2_IM2COL_CUDA 65 | 66 | #ifdef __cplusplus 67 | extern "C" 68 | { 69 | #endif 70 | 71 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 72 | const float *data_im, const float *data_offset, const float *data_mask, 73 | const int batch_size, const int channels, const int height_im, const int width_im, 74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 76 | const int dilation_h, const int dilation_w, 77 | const int deformable_group, float *data_col); 78 | 79 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 80 | const float *data_col, const float *data_offset, const float *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, float *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 88 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 89 | const int batch_size, const int channels, const int height_im, const int width_im, 90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 92 | const int dilation_h, const int dilation_w, 93 | const int deformable_group, 94 | float *grad_offset, float *grad_mask); 95 | 96 | #ifdef __cplusplus 97 | } 98 | #endif 99 | 100 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_im2col_cuda_double.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 3 | * 4 | * COPYRIGHT 5 | * 6 | * All contributions by the University of California: 7 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 8 | * All rights reserved. 9 | * 10 | * All other contributions: 11 | * Copyright (c) 2014-2017, the respective contributors 12 | * All rights reserved. 13 | * 14 | * Caffe uses a shared copyright model: each contributor holds copyright over 15 | * their contributions to Caffe. The project versioning records all such 16 | * contribution and copyright details. If a contributor wants to further mark 17 | * their specific copyright on a particular contribution, they should indicate 18 | * their copyright solely in the commit message of the change when it is 19 | * committed. 20 | * 21 | * LICENSE 22 | * 23 | * Redistribution and use in source and binary forms, with or without 24 | * modification, are permitted provided that the following conditions are met: 25 | * 26 | * 1. Redistributions of source code must retain the above copyright notice, this 27 | * list of conditions and the following disclaimer. 28 | * 2. Redistributions in binary form must reproduce the above copyright notice, 29 | * this list of conditions and the following disclaimer in the documentation 30 | * and/or other materials provided with the distribution. 31 | * 32 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 33 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 35 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 36 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 37 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 38 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 39 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 40 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | * 43 | * CONTRIBUTION AGREEMENT 44 | * 45 | * By contributing to the BVLC/caffe repository through pull-request, comment, 46 | * or otherwise, the contributor releases their content to the 47 | * license and copyright terms herein. 48 | * 49 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 50 | * 51 | * Copyright (c) 2018 Microsoft 52 | * Licensed under The MIT License [see LICENSE for details] 53 | * \file modulated_deformable_im2col.h 54 | * \brief Function definitions of converting an image to 55 | * column matrix based on kernel, padding, dilation, and offset. 56 | * These functions are mainly used in deformable convolution operators. 57 | * \ref: https://arxiv.org/abs/1811.11168 58 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 59 | */ 60 | 61 | /***************** Adapted by Charles Shang *********************/ 62 | 63 | #ifndef DCN_V2_IM2COL_CUDA_DOUBLE 64 | #define DCN_V2_IM2COL_CUDA_DOUBLE 65 | 66 | #ifdef __cplusplus 67 | extern "C" 68 | { 69 | #endif 70 | 71 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 72 | const double *data_im, const double *data_offset, const double *data_mask, 73 | const int batch_size, const int channels, const int height_im, const int width_im, 74 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 75 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 76 | const int dilation_h, const int dilation_w, 77 | const int deformable_group, double *data_col); 78 | 79 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 80 | const double *data_col, const double *data_offset, const double *data_mask, 81 | const int batch_size, const int channels, const int height_im, const int width_im, 82 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 83 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 84 | const int dilation_h, const int dilation_w, 85 | const int deformable_group, double *grad_im); 86 | 87 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 88 | const double *data_col, const double *data_im, const double *data_offset, const double *data_mask, 89 | const int batch_size, const int channels, const int height_im, const int width_im, 90 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 91 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 92 | const int dilation_h, const int dilation_w, 93 | const int deformable_group, 94 | double *grad_offset, double *grad_mask); 95 | 96 | #ifdef __cplusplus 97 | } 98 | #endif 99 | 100 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 Microsoft 3 | * Licensed under The MIT License [see LICENSE for details] 4 | * \file deformable_psroi_pooling.cu 5 | * \brief 6 | * \author Yi Li, Guodong Zhang, Jifeng Dai 7 | */ 8 | /***************** Adapted by Charles Shang *********************/ 9 | 10 | #ifndef DCN_V2_PSROI_POOLING_CUDA 11 | #define DCN_V2_PSROI_POOLING_CUDA 12 | 13 | #ifdef __cplusplus 14 | extern "C" 15 | { 16 | #endif 17 | 18 | void DeformablePSROIPoolForward(cudaStream_t stream, 19 | const float *data, 20 | const float *bbox, 21 | const float *trans, 22 | float *out, 23 | float *top_count, 24 | const int batch, 25 | const int channels, 26 | const int height, 27 | const int width, 28 | const int num_bbox, 29 | const int channels_trans, 30 | const int no_trans, 31 | const float spatial_scale, 32 | const int output_dim, 33 | const int group_size, 34 | const int pooled_size, 35 | const int part_size, 36 | const int sample_per_part, 37 | const float trans_std); 38 | 39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream, 40 | const float *out_grad, 41 | const float *data, 42 | const float *bbox, 43 | const float *trans, 44 | const float *top_count, 45 | float *in_grad, 46 | float *trans_grad, 47 | const int batch, 48 | const int channels, 49 | const int height, 50 | const int width, 51 | const int num_bbox, 52 | const int channels_trans, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | 66 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/cuda/dcn_v2_psroi_pooling_cuda_double.h: -------------------------------------------------------------------------------- 1 | /*! 2 | * Copyright (c) 2017 Microsoft 3 | * Licensed under The MIT License [see LICENSE for details] 4 | * \file deformable_psroi_pooling.cu 5 | * \brief 6 | * \author Yi Li, Guodong Zhang, Jifeng Dai 7 | */ 8 | /***************** Adapted by Charles Shang *********************/ 9 | 10 | #ifndef DCN_V2_PSROI_POOLING_CUDA_DOUBLE 11 | #define DCN_V2_PSROI_POOLING_CUDA_DOUBLE 12 | 13 | #ifdef __cplusplus 14 | extern "C" 15 | { 16 | #endif 17 | 18 | void DeformablePSROIPoolForward(cudaStream_t stream, 19 | const double *data, 20 | const double *bbox, 21 | const double *trans, 22 | double *out, 23 | double *top_count, 24 | const int batch, 25 | const int channels, 26 | const int height, 27 | const int width, 28 | const int num_bbox, 29 | const int channels_trans, 30 | const int no_trans, 31 | const double spatial_scale, 32 | const int output_dim, 33 | const int group_size, 34 | const int pooled_size, 35 | const int part_size, 36 | const int sample_per_part, 37 | const double trans_std); 38 | 39 | void DeformablePSROIPoolBackwardAcc(cudaStream_t stream, 40 | const double *out_grad, 41 | const double *data, 42 | const double *bbox, 43 | const double *trans, 44 | const double *top_count, 45 | double *in_grad, 46 | double *trans_grad, 47 | const int batch, 48 | const int channels, 49 | const int height, 50 | const int width, 51 | const int num_bbox, 52 | const int channels_trans, 53 | const int no_trans, 54 | const double spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const double trans_std); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif 65 | 66 | #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight, 6 | THFloatTensor *bias, THFloatTensor *ones, 7 | THFloatTensor *offset, THFloatTensor *mask, 8 | THFloatTensor *output, THFloatTensor *columns, 9 | const int pad_h, const int pad_w, 10 | const int stride_h, const int stride_w, 11 | const int dilation_h, const int dilation_w, 12 | const int deformable_group) 13 | { 14 | printf("only implemented in GPU"); 15 | } 16 | void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight, 17 | THFloatTensor *bias, THFloatTensor *ones, 18 | THFloatTensor *offset, THFloatTensor *mask, 19 | THFloatTensor *output, THFloatTensor *columns, 20 | THFloatTensor *grad_input, THFloatTensor *grad_weight, 21 | THFloatTensor *grad_bias, THFloatTensor *grad_offset, 22 | THFloatTensor *grad_mask, THFloatTensor *grad_output, 23 | int kernel_h, int kernel_w, 24 | int stride_h, int stride_w, 25 | int pad_h, int pad_w, 26 | int dilation_h, int dilation_w, 27 | int deformable_group) 28 | { 29 | printf("only implemented in GPU"); 30 | } -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | void dcn_v2_forward(THFloatTensor *input, THFloatTensor *weight, 2 | THFloatTensor *bias, THFloatTensor *ones, 3 | THFloatTensor *offset, THFloatTensor *mask, 4 | THFloatTensor *output, THFloatTensor *columns, 5 | const int pad_h, const int pad_w, 6 | const int stride_h, const int stride_w, 7 | const int dilation_h, const int dilation_w, 8 | const int deformable_group); 9 | void dcn_v2_backward(THFloatTensor *input, THFloatTensor *weight, 10 | THFloatTensor *bias, THFloatTensor *ones, 11 | THFloatTensor *offset, THFloatTensor *mask, 12 | THFloatTensor *output, THFloatTensor *columns, 13 | THFloatTensor *grad_input, THFloatTensor *grad_weight, 14 | THFloatTensor *grad_bias, THFloatTensor *grad_offset, 15 | THFloatTensor *grad_mask, THFloatTensor *grad_output, 16 | int kernel_h, int kernel_w, 17 | int stride_h, int stride_w, 18 | int pad_h, int pad_w, 19 | int dilation_h, int dilation_w, 20 | int deformable_group); -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_cuda.h: -------------------------------------------------------------------------------- 1 | // #ifndef DCN_V2_CUDA 2 | // #define DCN_V2_CUDA 3 | 4 | // #ifdef __cplusplus 5 | // extern "C" 6 | // { 7 | // #endif 8 | 9 | void dcn_v2_cuda_forward(THCudaTensor *input, THCudaTensor *weight, 10 | THCudaTensor *bias, THCudaTensor *ones, 11 | THCudaTensor *offset, THCudaTensor *mask, 12 | THCudaTensor *output, THCudaTensor *columns, 13 | int kernel_h, int kernel_w, 14 | const int stride_h, const int stride_w, 15 | const int pad_h, const int pad_w, 16 | const int dilation_h, const int dilation_w, 17 | const int deformable_group); 18 | void dcn_v2_cuda_backward(THCudaTensor *input, THCudaTensor *weight, 19 | THCudaTensor *bias, THCudaTensor *ones, 20 | THCudaTensor *offset, THCudaTensor *mask, 21 | THCudaTensor *columns, 22 | THCudaTensor *grad_input, THCudaTensor *grad_weight, 23 | THCudaTensor *grad_bias, THCudaTensor *grad_offset, 24 | THCudaTensor *grad_mask, THCudaTensor *grad_output, 25 | int kernel_h, int kernel_w, 26 | int stride_h, int stride_w, 27 | int pad_h, int pad_w, 28 | int dilation_h, int dilation_w, 29 | int deformable_group); 30 | 31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaTensor * input, THCudaTensor * bbox, 32 | THCudaTensor * trans, 33 | THCudaTensor * out, THCudaTensor * top_count, 34 | const int no_trans, 35 | const float spatial_scale, 36 | const int output_dim, 37 | const int group_size, 38 | const int pooled_size, 39 | const int part_size, 40 | const int sample_per_part, 41 | const float trans_std); 42 | 43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaTensor * out_grad, 44 | THCudaTensor * input, THCudaTensor * bbox, 45 | THCudaTensor * trans, THCudaTensor * top_count, 46 | THCudaTensor * input_grad, THCudaTensor * trans_grad, 47 | const int no_trans, 48 | const float spatial_scale, 49 | const int output_dim, 50 | const int group_size, 51 | const int pooled_size, 52 | const int part_size, 53 | const int sample_per_part, 54 | const float trans_std); 55 | 56 | // #ifdef __cplusplus 57 | // } 58 | // #endif 59 | 60 | // #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_cuda_double.h: -------------------------------------------------------------------------------- 1 | // #ifndef DCN_V2_CUDA 2 | // #define DCN_V2_CUDA 3 | 4 | // #ifdef __cplusplus 5 | // extern "C" 6 | // { 7 | // #endif 8 | 9 | void dcn_v2_cuda_forward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight, 10 | THCudaDoubleTensor *bias, THCudaDoubleTensor *ones, 11 | THCudaDoubleTensor *offset, THCudaDoubleTensor *mask, 12 | THCudaDoubleTensor *output, THCudaDoubleTensor *columns, 13 | int kernel_h, int kernel_w, 14 | const int stride_h, const int stride_w, 15 | const int pad_h, const int pad_w, 16 | const int dilation_h, const int dilation_w, 17 | const int deformable_group); 18 | void dcn_v2_cuda_backward(THCudaDoubleTensor *input, THCudaDoubleTensor *weight, 19 | THCudaDoubleTensor *bias, THCudaDoubleTensor *ones, 20 | THCudaDoubleTensor *offset, THCudaDoubleTensor *mask, 21 | THCudaDoubleTensor *columns, 22 | THCudaDoubleTensor *grad_input, THCudaDoubleTensor *grad_weight, 23 | THCudaDoubleTensor *grad_bias, THCudaDoubleTensor *grad_offset, 24 | THCudaDoubleTensor *grad_mask, THCudaDoubleTensor *grad_output, 25 | int kernel_h, int kernel_w, 26 | int stride_h, int stride_w, 27 | int pad_h, int pad_w, 28 | int dilation_h, int dilation_w, 29 | int deformable_group); 30 | 31 | void dcn_v2_psroi_pooling_cuda_forward(THCudaDoubleTensor * input, THCudaDoubleTensor * bbox, 32 | THCudaDoubleTensor * trans, 33 | THCudaDoubleTensor * out, THCudaDoubleTensor * top_count, 34 | const int no_trans, 35 | const double spatial_scale, 36 | const int output_dim, 37 | const int group_size, 38 | const int pooled_size, 39 | const int part_size, 40 | const int sample_per_part, 41 | const double trans_std); 42 | 43 | void dcn_v2_psroi_pooling_cuda_backward(THCudaDoubleTensor * out_grad, 44 | THCudaDoubleTensor * input, THCudaDoubleTensor * bbox, 45 | THCudaDoubleTensor * trans, THCudaDoubleTensor * top_count, 46 | THCudaDoubleTensor * input_grad, THCudaDoubleTensor * trans_grad, 47 | const int no_trans, 48 | const double spatial_scale, 49 | const int output_dim, 50 | const int group_size, 51 | const int pooled_size, 52 | const int part_size, 53 | const int sample_per_part, 54 | const double trans_std); 55 | 56 | 57 | // #ifdef __cplusplus 58 | // } 59 | // #endif 60 | 61 | // #endif -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_double.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight, 6 | THDoubleTensor *bias, THDoubleTensor *ones, 7 | THDoubleTensor *offset, THDoubleTensor *mask, 8 | THDoubleTensor *output, THDoubleTensor *columns, 9 | const int pad_h, const int pad_w, 10 | const int stride_h, const int stride_w, 11 | const int dilation_h, const int dilation_w, 12 | const int deformable_group) 13 | { 14 | printf("only implemented in GPU"); 15 | } 16 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight, 17 | THDoubleTensor *bias, THDoubleTensor *ones, 18 | THDoubleTensor *offset, THDoubleTensor *mask, 19 | THDoubleTensor *output, THDoubleTensor *columns, 20 | THDoubleTensor *grad_input, THDoubleTensor *grad_weight, 21 | THDoubleTensor *grad_bias, THDoubleTensor *grad_offset, 22 | THDoubleTensor *grad_mask, THDoubleTensor *grad_output, 23 | int kernel_h, int kernel_w, 24 | int stride_h, int stride_w, 25 | int pad_h, int pad_w, 26 | int dilation_h, int dilation_w, 27 | int deformable_group) 28 | { 29 | printf("only implemented in GPU"); 30 | } -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/src/dcn_v2_double.h: -------------------------------------------------------------------------------- 1 | void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight, 2 | THDoubleTensor *bias, THDoubleTensor *ones, 3 | THDoubleTensor *offset, THDoubleTensor *mask, 4 | THDoubleTensor *output, THDoubleTensor *columns, 5 | const int pad_h, const int pad_w, 6 | const int stride_h, const int stride_w, 7 | const int dilation_h, const int dilation_w, 8 | const int deformable_group); 9 | void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight, 10 | THDoubleTensor *bias, THDoubleTensor *ones, 11 | THDoubleTensor *offset, THDoubleTensor *mask, 12 | THDoubleTensor *output, THDoubleTensor *columns, 13 | THDoubleTensor *grad_input, THDoubleTensor *grad_weight, 14 | THDoubleTensor *grad_bias, THDoubleTensor *grad_offset, 15 | THDoubleTensor *grad_mask, THDoubleTensor *grad_output, 16 | int kernel_h, int kernel_w, 17 | int stride_h, int stride_w, 18 | int pad_h, int pad_w, 19 | int dilation_h, int dilation_w, 20 | int deformable_group); -------------------------------------------------------------------------------- /src/lib/models/networks/DCNv2/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import gradcheck 10 | 11 | from dcn_v2 import DCNv2 12 | from dcn_v2_func import DCNv2Function 13 | from dcn_v2 import DCNv2Pooling 14 | from dcn_v2_func import DCNv2PoolingFunction 15 | 16 | deformable_groups = 1 17 | N, inC, inH, inW = 2, 2, 4, 4 18 | outC = 2 19 | kH, kW = 3, 3 20 | 21 | def conv_identify(weight, bias): 22 | weight.data.zero_() 23 | bias.data.zero_() 24 | o, i, h, w = weight.shape 25 | y = h//2 26 | x = w//2 27 | for p in range(i): 28 | for q in range(o): 29 | if p == q: 30 | weight.data[q, p, y, x] = 1.0 31 | 32 | def check_zero_offset(): 33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW, 34 | kernel_size=(kH, kW), 35 | stride=(1, 1), 36 | padding=(1, 1), 37 | bias=True).cuda() 38 | 39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW, 40 | kernel_size=(kH, kW), 41 | stride=(1, 1), 42 | padding=(1, 1), 43 | bias=True).cuda() 44 | 45 | dcn_v2 = DCNv2(inC, outC, (kH, kW), 46 | stride=1, padding=1, dilation=1, 47 | deformable_groups=deformable_groups).cuda() 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW).cuda() 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print('Zero offset passed') 64 | else: 65 | print('Zero offset failed') 66 | 67 | def check_gradient_dconv_double(): 68 | 69 | input = torch.randn(N, inC, inH, inW, dtype=torch.float64).cuda() 70 | input.requires_grad = True 71 | 72 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW, dtype=torch.float64).cuda() 73 | # offset.data.zero_() 74 | # offset.data -= 0.00001 75 | offset.requires_grad = True 76 | 77 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW, dtype=torch.float64).cuda() 78 | # mask.data.zero_() 79 | mask.requires_grad = True 80 | mask = torch.sigmoid(mask) 81 | 82 | weight = torch.randn(outC, inC, kH, kW, dtype=torch.float64).cuda() 83 | weight.requires_grad = True 84 | 85 | bias = torch.rand(outC, dtype=torch.float64).cuda() 86 | bias.requires_grad = True 87 | 88 | func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups) 89 | 90 | print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-6, atol=1e-5, rtol=1e-3)) 91 | 92 | def check_gradient_dconv(): 93 | 94 | input = torch.randn(N, inC, inH, inW).cuda() 95 | input.requires_grad = True 96 | 97 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() 98 | # offset.data.zero_() 99 | # offset.data -= 0.5 100 | offset.requires_grad = True 101 | 102 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda() 103 | # mask.data.zero_() 104 | mask.requires_grad = True 105 | mask = torch.sigmoid(mask) 106 | 107 | weight = torch.randn(outC, inC, kH, kW).cuda() 108 | weight.requires_grad = True 109 | 110 | bias = torch.rand(outC).cuda() 111 | bias.requires_grad = True 112 | 113 | func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups) 114 | 115 | print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-3, atol=1e-3, rtol=1e-2)) 116 | 117 | def check_pooling_zero_offset(): 118 | from dcn_v2 import DCNv2Pooling 119 | input = torch.randn(2, 16, 64, 64).cuda().zero_() 120 | input[0, :, 16:26, 16:26] = 1. 121 | input[1, :, 10:20, 20:30] = 2. 122 | rois = torch.tensor([ 123 | [0, 65, 65, 103, 103], 124 | [1, 81, 41, 119, 79], 125 | ]).cuda().float() 126 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 127 | pooled_size=7, 128 | output_dim=16, 129 | no_trans=True, 130 | group_size=1, 131 | trans_std=0.1).cuda() 132 | 133 | out = pooling(input, rois, input.new()) 134 | s = ', '.join(['%f' % out[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 135 | print(s) 136 | 137 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 138 | pooled_size=7, 139 | output_dim=16, 140 | no_trans=False, 141 | group_size=1, 142 | trans_std=0.1).cuda() 143 | offset = torch.randn(20, 2, 7, 7).cuda().zero_() 144 | dout = dpooling(input, rois, offset) 145 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])]) 146 | print(s) 147 | 148 | def check_gradient_dpooling(): 149 | input = torch.randn(2, 3, 5, 5).cuda() * 0.01 150 | N = 4 151 | batch_inds = torch.randint(2, (N, 1)).cuda().float() 152 | x = torch.rand((N, 1)).cuda().float() * 15 153 | y = torch.rand((N, 1)).cuda().float() * 15 154 | w = torch.rand((N, 1)).cuda().float() * 10 155 | h = torch.rand((N, 1)).cuda().float() * 10 156 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 157 | offset = torch.randn(N, 2, 3, 3).cuda() 158 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 159 | pooled_size=3, 160 | output_dim=3, 161 | no_trans=False, 162 | group_size=1, 163 | trans_std=0.0).cuda() 164 | input.requires_grad = True 165 | offset.requires_grad = True 166 | print('check_gradient_dpooling', gradcheck(dpooling, (input, rois, offset), eps=1e-4)) 167 | 168 | 169 | def example_dconv(): 170 | from dcn_v2 import DCN 171 | input = torch.randn(2, 64, 128, 128).cuda() 172 | # wrap all things (offset and mask) in DCN 173 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda() 174 | output = dcn(input) 175 | targert = output.new(*output.size()) 176 | targert.data.uniform_(-0.01, 0.01) 177 | error = (targert - output).mean() 178 | error.backward() 179 | print(output.shape) 180 | 181 | def example_dpooling(): 182 | from dcn_v2 import DCNv2Pooling 183 | input = torch.randn(2, 32, 64, 64).cuda() 184 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 185 | x = torch.randint(256, (20, 1)).cuda().float() 186 | y = torch.randint(256, (20, 1)).cuda().float() 187 | w = torch.randint(64, (20, 1)).cuda().float() 188 | h = torch.randint(64, (20, 1)).cuda().float() 189 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 190 | offset = torch.randn(20, 2, 7, 7).cuda() 191 | input.requires_grad = True 192 | offset.requires_grad = True 193 | 194 | # normal roi_align 195 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 196 | pooled_size=7, 197 | output_dim=32, 198 | no_trans=True, 199 | group_size=1, 200 | trans_std=0.1).cuda() 201 | 202 | # deformable pooling 203 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 204 | pooled_size=7, 205 | output_dim=32, 206 | no_trans=False, 207 | group_size=1, 208 | trans_std=0.1).cuda() 209 | 210 | out = pooling(input, rois, offset) 211 | dout = dpooling(input, rois, offset) 212 | print(out.shape) 213 | print(dout.shape) 214 | 215 | target_out = out.new(*out.size()) 216 | target_out.data.uniform_(-0.01, 0.01) 217 | target_dout = dout.new(*dout.size()) 218 | target_dout.data.uniform_(-0.01, 0.01) 219 | e = (target_out - out).mean() 220 | e.backward() 221 | e = (target_dout - dout).mean() 222 | e.backward() 223 | 224 | def example_mdpooling(): 225 | from dcn_v2 import DCNPooling 226 | input = torch.randn(2, 32, 64, 64).cuda() 227 | input.requires_grad = True 228 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 229 | x = torch.randint(256, (20, 1)).cuda().float() 230 | y = torch.randint(256, (20, 1)).cuda().float() 231 | w = torch.randint(64, (20, 1)).cuda().float() 232 | h = torch.randint(64, (20, 1)).cuda().float() 233 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 234 | 235 | # mdformable pooling (V2) 236 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 237 | pooled_size=7, 238 | output_dim=32, 239 | no_trans=False, 240 | group_size=1, 241 | trans_std=0.1).cuda() 242 | 243 | dout = dpooling(input, rois) 244 | target = dout.new(*dout.size()) 245 | target.data.uniform_(-0.1, 0.1) 246 | error = (target - dout).mean() 247 | error.backward() 248 | print(dout.shape) 249 | 250 | if __name__ == '__main__': 251 | 252 | example_dconv() 253 | example_dpooling() 254 | example_mdpooling() 255 | 256 | check_pooling_zero_offset() 257 | # zero offset check 258 | if inC == outC: 259 | check_zero_offset() 260 | 261 | check_gradient_dpooling() 262 | 263 | # # gradient check 264 | # try: 265 | # check_gradient_double() 266 | # except TypeError: 267 | # print('''****** You can swith to double precision in dcn_v2_func.py by (un)commenting these two lines: 268 | # ****** from _ext import dcn_v2 as _backend 269 | # ****** from _ext import dcn_v2_double as _backend''') 270 | # print('****** Your tensor may not be **double** type') 271 | # print('****** Switching to **float** type') 272 | # 273 | # check_gradient() 274 | # finally: 275 | # print('****** Note: backward is not reentrant error may not be a serious problem, ' 276 | # '****** since the max error is less than 1e-7\n' 277 | # '****** Still looking for what trigger this problem') -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from ._cpools import TopPool, BottomPool, LeftPool, RightPool 2 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/_cpools/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | cpools.egg-info/ 3 | dist/ 4 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/_cpools/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | from torch.autograd import Function 5 | 6 | import top_pool, bottom_pool, left_pool, right_pool 7 | 8 | class TopPoolFunction(Function): 9 | @staticmethod 10 | def forward(ctx, input): 11 | output = top_pool.forward(input)[0] 12 | ctx.save_for_backward(input) 13 | return output 14 | 15 | @staticmethod 16 | def backward(ctx, grad_output): 17 | input = ctx.saved_variables[0] 18 | output = top_pool.backward(input, grad_output)[0] 19 | return output 20 | 21 | class BottomPoolFunction(Function): 22 | @staticmethod 23 | def forward(ctx, input): 24 | output = bottom_pool.forward(input)[0] 25 | ctx.save_for_backward(input) 26 | return output 27 | 28 | @staticmethod 29 | def backward(ctx, grad_output): 30 | input = ctx.saved_variables[0] 31 | output = bottom_pool.backward(input, grad_output)[0] 32 | return output 33 | 34 | class LeftPoolFunction(Function): 35 | @staticmethod 36 | def forward(ctx, input): 37 | output = left_pool.forward(input)[0] 38 | ctx.save_for_backward(input) 39 | return output 40 | 41 | @staticmethod 42 | def backward(ctx, grad_output): 43 | input = ctx.saved_variables[0] 44 | output = left_pool.backward(input, grad_output)[0] 45 | return output 46 | 47 | class RightPoolFunction(Function): 48 | @staticmethod 49 | def forward(ctx, input): 50 | output = right_pool.forward(input)[0] 51 | ctx.save_for_backward(input) 52 | return output 53 | 54 | @staticmethod 55 | def backward(ctx, grad_output): 56 | input = ctx.saved_variables[0] 57 | output = right_pool.backward(input, grad_output)[0] 58 | return output 59 | 60 | class TopPool(nn.Module): 61 | def forward(self, x): 62 | return TopPoolFunction.apply(x) 63 | 64 | class BottomPool(nn.Module): 65 | def forward(self, x): 66 | return BottomPoolFunction.apply(x) 67 | 68 | class LeftPool(nn.Module): 69 | def forward(self, x): 70 | return LeftPoolFunction.apply(x) 71 | 72 | class RightPool(nn.Module): 73 | def forward(self, x): 74 | return RightPoolFunction.apply(x) 75 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/_cpools/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension 3 | 4 | setup( 5 | name="cpools", 6 | ext_modules=[ 7 | CppExtension("top_pool", ["src/top_pool.cpp"]), 8 | CppExtension("bottom_pool", ["src/bottom_pool.cpp"]), 9 | CppExtension("left_pool", ["src/left_pool.cpp"]), 10 | CppExtension("right_pool", ["src/right_pool.cpp"]) 11 | ], 12 | cmdclass={ 13 | "build_ext": BuildExtension 14 | } 15 | ) 16 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/_cpools/src/bottom_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < height; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 2, ind, height); 18 | at::Tensor cur_temp = at::slice(output, 2, ind, height); 19 | at::Tensor next_temp = at::slice(output, 2, 0, height-ind); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | auto output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(2, 0); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(0); 46 | 47 | auto output_temp = output.select(2, 0); 48 | auto grad_output_temp = grad_output.select(2, 0); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(2); 52 | auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 0; ind < height - 1; ++ind) { 55 | input_temp = input.select(2, ind + 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, ind + 1); 61 | 62 | grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2); 63 | output.scatter_add_(2, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &pool_forward, "Bottom Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &pool_backward, "Bottom Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/_cpools/src/left_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < width; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 3, 0, width-ind); 18 | at::Tensor cur_temp = at::slice(output, 3, 0, width-ind); 19 | at::Tensor next_temp = at::slice(output, 3, ind, width); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | auto output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(3, width - 1); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(width - 1); 46 | 47 | auto output_temp = output.select(3, width - 1); 48 | auto grad_output_temp = grad_output.select(3, width - 1); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(3); 52 | auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 1; ind < width; ++ind) { 55 | input_temp = input.select(3, width - ind - 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, width - ind - 1); 61 | 62 | grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3); 63 | output.scatter_add_(3, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &pool_forward, "Left Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &pool_backward, "Left Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/_cpools/src/right_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get width 12 | int64_t width = input.size(3); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < width; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 3, ind, width); 18 | at::Tensor cur_temp = at::slice(output, 3, ind, width); 19 | at::Tensor next_temp = at::slice(output, 3, 0, width-ind); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | at::Tensor output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(3, 0); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(0); 46 | 47 | auto output_temp = output.select(3, 0); 48 | auto grad_output_temp = grad_output.select(3, 0); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(3); 52 | auto gt_mask = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, height}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 0; ind < width - 1; ++ind) { 55 | input_temp = input.select(3, ind + 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, ind + 1); 61 | 62 | grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3); 63 | output.scatter_add_(3, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &pool_forward, "Right Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &pool_backward, "Right Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/_cpools/src/top_pool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | std::vector top_pool_forward( 6 | at::Tensor input 7 | ) { 8 | // Initialize output 9 | at::Tensor output = at::zeros_like(input); 10 | 11 | // Get height 12 | int64_t height = input.size(2); 13 | 14 | output.copy_(input); 15 | 16 | for (int64_t ind = 1; ind < height; ind <<= 1) { 17 | at::Tensor max_temp = at::slice(output, 2, 0, height-ind); 18 | at::Tensor cur_temp = at::slice(output, 2, 0, height-ind); 19 | at::Tensor next_temp = at::slice(output, 2, ind, height); 20 | at::max_out(max_temp, cur_temp, next_temp); 21 | } 22 | 23 | return { 24 | output 25 | }; 26 | } 27 | 28 | std::vector top_pool_backward( 29 | at::Tensor input, 30 | at::Tensor grad_output 31 | ) { 32 | auto output = at::zeros_like(input); 33 | 34 | int32_t batch = input.size(0); 35 | int32_t channel = input.size(1); 36 | int32_t height = input.size(2); 37 | int32_t width = input.size(3); 38 | 39 | auto max_val = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 40 | auto max_ind = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kLong)); 41 | 42 | auto input_temp = input.select(2, height - 1); 43 | max_val.copy_(input_temp); 44 | 45 | max_ind.fill_(height - 1); 46 | 47 | auto output_temp = output.select(2, height - 1); 48 | auto grad_output_temp = grad_output.select(2, height - 1); 49 | output_temp.copy_(grad_output_temp); 50 | 51 | auto un_max_ind = max_ind.unsqueeze(2); 52 | auto gt_mask = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kByte)); 53 | auto max_temp = torch::zeros({batch, channel, width}, at::device(at::kCUDA).dtype(at::kFloat)); 54 | for (int32_t ind = 1; ind < height; ++ind) { 55 | input_temp = input.select(2, height - ind - 1); 56 | at::gt_out(gt_mask, input_temp, max_val); 57 | 58 | at::masked_select_out(max_temp, input_temp, gt_mask); 59 | max_val.masked_scatter_(gt_mask, max_temp); 60 | max_ind.masked_fill_(gt_mask, height - ind - 1); 61 | 62 | grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2); 63 | output.scatter_add_(2, un_max_ind, grad_output_temp); 64 | } 65 | 66 | return { 67 | output 68 | }; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def( 73 | "forward", &top_pool_forward, "Top Pool Forward", 74 | py::call_guard() 75 | ); 76 | m.def( 77 | "backward", &top_pool_backward, "Top Pool Backward", 78 | py::call_guard() 79 | ); 80 | } 81 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | from .scatter_gather import scatter_kwargs 8 | 9 | class DataParallel(Module): 10 | r"""Implements data parallelism at the module level. 11 | 12 | This container parallelizes the application of the given module by 13 | splitting the input across the specified devices by chunking in the batch 14 | dimension. In the forward pass, the module is replicated on each device, 15 | and each replica handles a portion of the input. During the backwards 16 | pass, gradients from each replica are summed into the original module. 17 | 18 | The batch size should be larger than the number of GPUs used. It should 19 | also be an integer multiple of the number of GPUs so that each chunk is the 20 | same size (so that each GPU processes the same number of samples). 21 | 22 | See also: :ref:`cuda-nn-dataparallel-instead` 23 | 24 | Arbitrary positional and keyword inputs are allowed to be passed into 25 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 26 | specified (default 0). Primitive types will be broadcasted, but all 27 | other types will be a shallow copy and can be corrupted if written to in 28 | the model's forward pass. 29 | 30 | Args: 31 | module: module to be parallelized 32 | device_ids: CUDA devices (default: all devices) 33 | output_device: device location of output (default: device_ids[0]) 34 | 35 | Example:: 36 | 37 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 38 | >>> output = net(input_var) 39 | """ 40 | 41 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 42 | 43 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 44 | super(DataParallel, self).__init__() 45 | 46 | if not torch.cuda.is_available(): 47 | self.module = module 48 | self.device_ids = [] 49 | return 50 | 51 | if device_ids is None: 52 | device_ids = list(range(torch.cuda.device_count())) 53 | if output_device is None: 54 | output_device = device_ids[0] 55 | self.dim = dim 56 | self.module = module 57 | self.device_ids = device_ids 58 | self.chunk_sizes = chunk_sizes 59 | self.output_device = output_device 60 | if len(self.device_ids) == 1: 61 | self.module.cuda(device_ids[0]) 62 | 63 | def forward(self, *inputs, **kwargs): 64 | if not self.device_ids: 65 | return self.module(*inputs, **kwargs) 66 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 67 | if len(self.device_ids) == 1: 68 | return self.module(*inputs[0], **kwargs[0]) 69 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 70 | outputs = self.parallel_apply(replicas, inputs, kwargs) 71 | return self.gather(outputs, self.output_device) 72 | 73 | def replicate(self, module, device_ids): 74 | return replicate(module, device_ids) 75 | 76 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 77 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 78 | 79 | def parallel_apply(self, replicas, inputs, kwargs): 80 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 81 | 82 | def gather(self, outputs, output_device): 83 | return gather(outputs, output_device, dim=self.dim) 84 | 85 | 86 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 87 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 88 | 89 | This is the functional version of the DataParallel module. 90 | 91 | Args: 92 | module: the module to evaluate in parallel 93 | inputs: inputs to the module 94 | device_ids: GPU ids on which to replicate module 95 | output_device: GPU location of the output Use -1 to indicate the CPU. 96 | (default: device_ids[0]) 97 | Returns: 98 | a Variable containing the result of module(input) located on 99 | output_device 100 | """ 101 | if not isinstance(inputs, tuple): 102 | inputs = (inputs,) 103 | 104 | if device_ids is None: 105 | device_ids = list(range(torch.cuda.device_count())) 106 | 107 | if output_device is None: 108 | output_device = device_ids[0] 109 | 110 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 111 | if len(device_ids) == 1: 112 | return module(*inputs[0], **module_kwargs[0]) 113 | used_device_ids = device_ids[:len(inputs)] 114 | replicas = replicate(module, used_device_ids) 115 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 116 | return gather(outputs, output_device, dim) 117 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from .utils import _tranpose_and_gather_feat 5 | 6 | def _sigmoid(x): 7 | return torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 8 | 9 | def _ae_loss(tag0, tag1, mask): 10 | num = mask.sum(dim=1, keepdim=True).float() 11 | tag0 = tag0.squeeze() 12 | tag1 = tag1.squeeze() 13 | 14 | tag_mean = (tag0 + tag1) / 2 15 | 16 | tag0 = torch.pow(tag0 - tag_mean, 2) / (num + 1e-4) 17 | tag0 = tag0[mask].sum() 18 | tag1 = torch.pow(tag1 - tag_mean, 2) / (num + 1e-4) 19 | tag1 = tag1[mask].sum() 20 | pull = tag0 + tag1 21 | 22 | mask = mask.unsqueeze(1) + mask.unsqueeze(2) 23 | mask = mask.eq(2) 24 | num = num.unsqueeze(2) 25 | num2 = (num - 1) * num 26 | dist = tag_mean.unsqueeze(1) - tag_mean.unsqueeze(2) 27 | dist = 1 - torch.abs(dist) 28 | dist = nn.functional.relu(dist, inplace=True) 29 | dist = dist - 1 / (num + 1e-4) 30 | dist = dist / (num2 + 1e-4) 31 | dist = dist[mask] 32 | push = dist.sum() 33 | return pull, push 34 | 35 | def _off_loss(off, gt_off, mask): 36 | num = mask.float().sum() 37 | mask = mask.unsqueeze(2).expand_as(gt_off) 38 | 39 | off = off[mask] 40 | gt_off = gt_off[mask] 41 | 42 | off_loss = nn.functional.smooth_l1_loss(off, gt_off, reduction="sum") 43 | off_loss = off_loss / (num + 1e-4) 44 | return off_loss 45 | 46 | def _focal_loss_mask(preds, gt, mask): 47 | pos_inds = gt.eq(1) 48 | neg_inds = gt.lt(1) 49 | 50 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 51 | 52 | pos_mask = mask[pos_inds] 53 | neg_mask = mask[neg_inds] 54 | 55 | loss = 0 56 | for pred in preds: 57 | pos_pred = pred[pos_inds] 58 | neg_pred = pred[neg_inds] 59 | 60 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) * pos_mask 61 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights * neg_mask 62 | 63 | num_pos = pos_inds.float().sum() 64 | pos_loss = pos_loss.sum() 65 | neg_loss = neg_loss.sum() 66 | 67 | if pos_pred.nelement() == 0: 68 | loss = loss - neg_loss 69 | else: 70 | loss = loss - (pos_loss + neg_loss) / num_pos 71 | return loss 72 | 73 | def _focal_loss(preds, gt): 74 | pos_inds = gt.eq(1) 75 | neg_inds = gt.lt(1) 76 | 77 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 78 | 79 | loss = 0 80 | for pred in preds: 81 | pos_pred = pred[pos_inds] 82 | neg_pred = pred[neg_inds] 83 | 84 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 85 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 86 | 87 | num_pos = pos_inds.float().sum() 88 | pos_loss = pos_loss.sum() 89 | neg_loss = neg_loss.sum() 90 | 91 | if pos_pred.nelement() == 0: 92 | loss = loss - neg_loss 93 | else: 94 | loss = loss - (pos_loss + neg_loss) / num_pos 95 | return loss 96 | 97 | class CornerNet_Saccade_Loss(nn.Module): 98 | def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss_mask): 99 | super(CornerNet_Saccade_Loss, self).__init__() 100 | 101 | self.pull_weight = pull_weight 102 | self.push_weight = push_weight 103 | self.off_weight = off_weight 104 | self.focal_loss = focal_loss 105 | self.ae_loss = _ae_loss 106 | self.off_loss = _off_loss 107 | 108 | def forward(self, outs, targets): 109 | tl_heats = outs[0] 110 | br_heats = outs[1] 111 | tl_tags = outs[2] 112 | br_tags = outs[3] 113 | tl_offs = outs[4] 114 | br_offs = outs[5] 115 | atts = outs[6] 116 | 117 | gt_tl_heat = targets[0] 118 | gt_br_heat = targets[1] 119 | gt_mask = targets[2] 120 | gt_tl_off = targets[3] 121 | gt_br_off = targets[4] 122 | gt_tl_ind = targets[5] 123 | gt_br_ind = targets[6] 124 | gt_tl_valid = targets[7] 125 | gt_br_valid = targets[8] 126 | gt_atts = targets[9] 127 | 128 | # focal loss 129 | focal_loss = 0 130 | 131 | tl_heats = [_sigmoid(t) for t in tl_heats] 132 | br_heats = [_sigmoid(b) for b in br_heats] 133 | 134 | focal_loss += self.focal_loss(tl_heats, gt_tl_heat, gt_tl_valid) 135 | focal_loss += self.focal_loss(br_heats, gt_br_heat, gt_br_valid) 136 | 137 | atts = [[_sigmoid(a) for a in att] for att in atts] 138 | atts = [[att[ind] for att in atts] for ind in range(len(gt_atts))] 139 | 140 | att_loss = 0 141 | for att, gt_att in zip(atts, gt_atts): 142 | att_loss += _focal_loss(att, gt_att) / max(len(att), 1) 143 | 144 | # tag loss 145 | pull_loss = 0 146 | push_loss = 0 147 | tl_tags = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags] 148 | br_tags = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags] 149 | for tl_tag, br_tag in zip(tl_tags, br_tags): 150 | pull, push = self.ae_loss(tl_tag, br_tag, gt_mask) 151 | pull_loss += pull 152 | push_loss += push 153 | pull_loss = self.pull_weight * pull_loss 154 | push_loss = self.push_weight * push_loss 155 | 156 | off_loss = 0 157 | tl_offs = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs] 158 | br_offs = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs] 159 | for tl_off, br_off in zip(tl_offs, br_offs): 160 | off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask) 161 | off_loss += self.off_loss(br_off, gt_br_off, gt_mask) 162 | off_loss = self.off_weight * off_loss 163 | 164 | loss = (focal_loss + att_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1) 165 | return loss.unsqueeze(0) 166 | 167 | class CornerNet_Loss(nn.Module): 168 | def __init__(self, pull_weight=1, push_weight=1, off_weight=1, focal_loss=_focal_loss): 169 | super(CornerNet_Loss, self).__init__() 170 | 171 | self.pull_weight = pull_weight 172 | self.push_weight = push_weight 173 | self.off_weight = off_weight 174 | self.focal_loss = focal_loss 175 | self.ae_loss = _ae_loss 176 | self.off_loss = _off_loss 177 | 178 | def forward(self, outs, targets): 179 | tl_heats = outs[0] 180 | br_heats = outs[1] 181 | tl_tags = outs[2] 182 | br_tags = outs[3] 183 | tl_offs = outs[4] 184 | br_offs = outs[5] 185 | 186 | gt_tl_heat = targets[0] 187 | gt_br_heat = targets[1] 188 | gt_mask = targets[2] 189 | gt_tl_off = targets[3] 190 | gt_br_off = targets[4] 191 | gt_tl_ind = targets[5] 192 | gt_br_ind = targets[6] 193 | 194 | # focal loss 195 | focal_loss = 0 196 | 197 | tl_heats = [_sigmoid(t) for t in tl_heats] 198 | br_heats = [_sigmoid(b) for b in br_heats] 199 | 200 | focal_loss += self.focal_loss(tl_heats, gt_tl_heat) 201 | focal_loss += self.focal_loss(br_heats, gt_br_heat) 202 | 203 | # tag loss 204 | pull_loss = 0 205 | push_loss = 0 206 | tl_tags = [_tranpose_and_gather_feat(tl_tag, gt_tl_ind) for tl_tag in tl_tags] 207 | br_tags = [_tranpose_and_gather_feat(br_tag, gt_br_ind) for br_tag in br_tags] 208 | for tl_tag, br_tag in zip(tl_tags, br_tags): 209 | pull, push = self.ae_loss(tl_tag, br_tag, gt_mask) 210 | pull_loss += pull 211 | push_loss += push 212 | pull_loss = self.pull_weight * pull_loss 213 | push_loss = self.push_weight * push_loss 214 | 215 | off_loss = 0 216 | tl_offs = [_tranpose_and_gather_feat(tl_off, gt_tl_ind) for tl_off in tl_offs] 217 | br_offs = [_tranpose_and_gather_feat(br_off, gt_br_ind) for br_off in br_offs] 218 | for tl_off, br_off in zip(tl_offs, br_offs): 219 | off_loss += self.off_loss(tl_off, gt_tl_off, gt_mask) 220 | off_loss += self.off_loss(br_off, gt_br_off, gt_mask) 221 | off_loss = self.off_weight * off_loss 222 | 223 | loss = (focal_loss + pull_loss + push_loss + off_loss) / max(len(tl_heats), 1) 224 | return loss.unsqueeze(0) 225 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /src/lib/models/networks/py_utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def _gather_feat(feat, ind, mask=None): 5 | dim = feat.size(2) 6 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 7 | feat = feat.gather(1, ind) 8 | if mask is not None: 9 | mask = mask.unsqueeze(2).expand_as(feat) 10 | feat = feat[mask] 11 | feat = feat.view(-1, dim) 12 | return feat 13 | 14 | def _nms(heat, kernel=1): 15 | pad = (kernel - 1) // 2 16 | 17 | hmax = nn.functional.max_pool2d(heat, (kernel, kernel), stride=1, padding=pad) 18 | keep = (hmax == heat).float() 19 | return heat * keep 20 | 21 | def _tranpose_and_gather_feat(feat, ind): 22 | feat = feat.permute(0, 2, 3, 1).contiguous() 23 | feat = feat.view(feat.size(0), -1, feat.size(3)) 24 | feat = _gather_feat(feat, ind) 25 | return feat 26 | 27 | def _topk(scores, K=20): 28 | batch, cat, height, width = scores.size() 29 | 30 | topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K) 31 | 32 | topk_clses = (topk_inds / (height * width)).int() 33 | 34 | topk_inds = topk_inds % (height * width) 35 | topk_ys = (topk_inds / width).int().float() 36 | topk_xs = (topk_inds % width).int().float() 37 | return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs 38 | 39 | def _decode( 40 | tl_heat, br_heat, tl_tag, br_tag, tl_regr, br_regr, 41 | K=100, kernel=1, ae_threshold=1, num_dets=1000, no_border=False 42 | ): 43 | batch, cat, height, width = tl_heat.size() 44 | 45 | tl_heat = torch.sigmoid(tl_heat) 46 | br_heat = torch.sigmoid(br_heat) 47 | 48 | # perform nms on heatmaps 49 | tl_heat = _nms(tl_heat, kernel=kernel) 50 | br_heat = _nms(br_heat, kernel=kernel) 51 | 52 | tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = _topk(tl_heat, K=K) 53 | br_scores, br_inds, br_clses, br_ys, br_xs = _topk(br_heat, K=K) 54 | 55 | tl_ys = tl_ys.view(batch, K, 1).expand(batch, K, K) 56 | tl_xs = tl_xs.view(batch, K, 1).expand(batch, K, K) 57 | br_ys = br_ys.view(batch, 1, K).expand(batch, K, K) 58 | br_xs = br_xs.view(batch, 1, K).expand(batch, K, K) 59 | 60 | if no_border: 61 | tl_ys_binds = (tl_ys == 0) 62 | tl_xs_binds = (tl_xs == 0) 63 | br_ys_binds = (br_ys == height - 1) 64 | br_xs_binds = (br_xs == width - 1) 65 | 66 | if tl_regr is not None and br_regr is not None: 67 | tl_regr = _tranpose_and_gather_feat(tl_regr, tl_inds) 68 | tl_regr = tl_regr.view(batch, K, 1, 2) 69 | br_regr = _tranpose_and_gather_feat(br_regr, br_inds) 70 | br_regr = br_regr.view(batch, 1, K, 2) 71 | 72 | tl_xs = tl_xs + tl_regr[..., 0] 73 | tl_ys = tl_ys + tl_regr[..., 1] 74 | br_xs = br_xs + br_regr[..., 0] 75 | br_ys = br_ys + br_regr[..., 1] 76 | 77 | # all possible boxes based on top k corners (ignoring class) 78 | bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3) 79 | 80 | tl_tag = _tranpose_and_gather_feat(tl_tag, tl_inds) 81 | tl_tag = tl_tag.view(batch, K, 1) 82 | br_tag = _tranpose_and_gather_feat(br_tag, br_inds) 83 | br_tag = br_tag.view(batch, 1, K) 84 | dists = torch.abs(tl_tag - br_tag) 85 | 86 | tl_scores = tl_scores.view(batch, K, 1).expand(batch, K, K) 87 | br_scores = br_scores.view(batch, 1, K).expand(batch, K, K) 88 | scores = (tl_scores + br_scores) / 2 89 | 90 | # reject boxes based on classes 91 | tl_clses = tl_clses.view(batch, K, 1).expand(batch, K, K) 92 | br_clses = br_clses.view(batch, 1, K).expand(batch, K, K) 93 | cls_inds = (tl_clses != br_clses) 94 | 95 | # reject boxes based on distances 96 | dist_inds = (dists > ae_threshold) 97 | 98 | # reject boxes based on widths and heights 99 | width_inds = (br_xs < tl_xs) 100 | height_inds = (br_ys < tl_ys) 101 | 102 | if no_border: 103 | scores[tl_ys_binds] = -1 104 | scores[tl_xs_binds] = -1 105 | scores[br_ys_binds] = -1 106 | scores[br_xs_binds] = -1 107 | 108 | scores[cls_inds] = -1 109 | scores[dist_inds] = -1 110 | scores[width_inds] = -1 111 | scores[height_inds] = -1 112 | 113 | scores = scores.view(batch, -1) 114 | scores, inds = torch.topk(scores, num_dets) 115 | scores = scores.unsqueeze(2) 116 | 117 | bboxes = bboxes.view(batch, -1, 4) 118 | bboxes = _gather_feat(bboxes, inds) 119 | 120 | clses = tl_clses.contiguous().view(batch, -1, 1) 121 | clses = _gather_feat(clses, inds).float() 122 | 123 | tl_scores = tl_scores.contiguous().view(batch, -1, 1) 124 | tl_scores = _gather_feat(tl_scores, inds).float() 125 | br_scores = br_scores.contiguous().view(batch, -1, 1) 126 | br_scores = _gather_feat(br_scores, inds).float() 127 | 128 | detections = torch.cat([bboxes, scores, tl_scores, br_scores, clses], dim=2) 129 | return detections 130 | 131 | class upsample(nn.Module): 132 | def __init__(self, scale_factor): 133 | super(upsample, self).__init__() 134 | self.scale_factor = scale_factor 135 | 136 | def forward(self, x): 137 | return nn.functional.interpolate(x, scale_factor=self.scale_factor) 138 | 139 | class merge(nn.Module): 140 | def forward(self, x, y): 141 | return x + y 142 | 143 | class convolution(nn.Module): 144 | def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True): 145 | super(convolution, self).__init__() 146 | 147 | pad = (k - 1) // 2 148 | self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn) 149 | self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential() 150 | self.relu = nn.ReLU(inplace=True) 151 | 152 | def forward(self, x): 153 | conv = self.conv(x) 154 | bn = self.bn(conv) 155 | relu = self.relu(bn) 156 | return relu 157 | 158 | class residual(nn.Module): 159 | def __init__(self, inp_dim, out_dim, k=3, stride=1): 160 | super(residual, self).__init__() 161 | p = (k - 1) // 2 162 | 163 | self.conv1 = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(p, p), stride=(stride, stride), bias=False) 164 | self.bn1 = nn.BatchNorm2d(out_dim) 165 | self.relu1 = nn.ReLU(inplace=True) 166 | 167 | self.conv2 = nn.Conv2d(out_dim, out_dim, (k, k), padding=(p, p), bias=False) 168 | self.bn2 = nn.BatchNorm2d(out_dim) 169 | 170 | self.skip = nn.Sequential( 171 | nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False), 172 | nn.BatchNorm2d(out_dim) 173 | ) if stride != 1 or inp_dim != out_dim else nn.Sequential() 174 | self.relu = nn.ReLU(inplace=True) 175 | 176 | def forward(self, x): 177 | conv1 = self.conv1(x) 178 | bn1 = self.bn1(conv1) 179 | relu1 = self.relu1(bn1) 180 | 181 | conv2 = self.conv2(relu1) 182 | bn2 = self.bn2(conv2) 183 | 184 | skip = self.skip(x) 185 | return self.relu(bn2 + skip) 186 | 187 | class corner_pool(nn.Module): 188 | def __init__(self, dim, pool1, pool2): 189 | super(corner_pool, self).__init__() 190 | self._init_layers(dim, pool1, pool2) 191 | 192 | def _init_layers(self, dim, pool1, pool2): 193 | self.p1_conv1 = convolution(3, dim, 128) 194 | self.p2_conv1 = convolution(3, dim, 128) 195 | 196 | self.p_conv1 = nn.Conv2d(128, dim, (3, 3), padding=(1, 1), bias=False) 197 | self.p_bn1 = nn.BatchNorm2d(dim) 198 | 199 | self.conv1 = nn.Conv2d(dim, dim, (1, 1), bias=False) 200 | self.bn1 = nn.BatchNorm2d(dim) 201 | self.relu1 = nn.ReLU(inplace=True) 202 | 203 | self.conv2 = convolution(3, dim, dim) 204 | 205 | self.pool1 = pool1() 206 | self.pool2 = pool2() 207 | 208 | def forward(self, x): 209 | # pool 1 210 | p1_conv1 = self.p1_conv1(x) 211 | pool1 = self.pool1(p1_conv1) 212 | 213 | # pool 2 214 | p2_conv1 = self.p2_conv1(x) 215 | pool2 = self.pool2(p2_conv1) 216 | 217 | # pool 1 + pool 2 218 | p_conv1 = self.p_conv1(pool1 + pool2) 219 | p_bn1 = self.p_bn1(p_conv1) 220 | 221 | conv1 = self.conv1(x) 222 | bn1 = self.bn1(conv1) 223 | relu1 = self.relu1(p_bn1 + bn1) 224 | 225 | conv2 = self.conv2(relu1) 226 | return conv2 227 | -------------------------------------------------------------------------------- /src/lib/models/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /src/lib/models/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | def _gather_feat(feat, ind, mask=None): 13 | dim = feat.size(2) 14 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 15 | feat = feat.gather(1, ind) 16 | if mask is not None: 17 | mask = mask.unsqueeze(2).expand_as(feat) 18 | feat = feat[mask] 19 | feat = feat.view(-1, dim) 20 | return feat 21 | 22 | def _tranpose_and_gather_feat(feat, ind): 23 | feat = feat.permute(0, 2, 3, 1).contiguous() 24 | feat = feat.view(feat.size(0), -1, feat.size(3)) 25 | feat = _gather_feat(feat, ind) 26 | return feat 27 | 28 | def flip_tensor(x): 29 | return torch.flip(x, [3]) 30 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 31 | # return torch.from_numpy(tmp).to(x.device) 32 | 33 | def flip_lr(x, flip_idx): 34 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 35 | shape = tmp.shape 36 | for e in flip_idx: 37 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 38 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 39 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 40 | 41 | def flip_lr_off(x, flip_idx): 42 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 43 | shape = tmp.shape 44 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 45 | tmp.shape[2], tmp.shape[3]) 46 | tmp[:, :, 0, :, :] *= -1 47 | for e in flip_idx: 48 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 49 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 50 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) -------------------------------------------------------------------------------- /src/lib/trains/base_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import time 6 | import torch 7 | from progress.bar import Bar 8 | from models.data_parallel import DataParallel 9 | from utils.utils import AverageMeter 10 | 11 | 12 | class ModleWithLoss(torch.nn.Module): 13 | def __init__(self, model, loss): 14 | super(ModleWithLoss, self).__init__() 15 | self.model = model 16 | self.loss = loss 17 | 18 | def forward(self, batch): 19 | outputs = self.model(batch['input']) 20 | loss, loss_stats = self.loss(outputs, batch) 21 | return outputs[-1], loss, loss_stats 22 | 23 | 24 | class BaseTrainer(object): 25 | def __init__( 26 | self, opt, model, optimizer=None): 27 | self.opt = opt 28 | self.optimizer = optimizer 29 | self.loss_stats, self.loss = self._get_losses(opt) 30 | self.model_with_loss = ModleWithLoss(model, self.loss) 31 | 32 | def set_device(self, gpus, chunk_sizes, device): 33 | if len(gpus) > 1: 34 | self.model_with_loss = DataParallel( 35 | self.model_with_loss, device_ids=gpus, 36 | chunk_sizes=chunk_sizes).to(device) 37 | else: 38 | self.model_with_loss = self.model_with_loss.to(device) 39 | 40 | for state in self.optimizer.state.values(): 41 | for k, v in state.items(): 42 | if isinstance(v, torch.Tensor): 43 | state[k] = v.to(device=device, non_blocking=True) 44 | 45 | def run_epoch(self, phase, epoch, data_loader): 46 | model_with_loss = self.model_with_loss 47 | if phase == 'train': 48 | model_with_loss.train() 49 | else: 50 | if len(self.opt.gpus) > 1: 51 | model_with_loss = self.model_with_loss.module 52 | model_with_loss.eval() 53 | torch.cuda.empty_cache() 54 | 55 | opt = self.opt 56 | results = {} 57 | data_time, batch_time = AverageMeter(), AverageMeter() 58 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 59 | num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters 60 | bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) 61 | end = time.time() 62 | for iter_id, batch in enumerate(data_loader): 63 | if iter_id >= num_iters: 64 | break 65 | data_time.update(time.time() - end) 66 | 67 | for k in batch: 68 | if k != 'meta': 69 | batch[k] = batch[k].to(device=opt.device, non_blocking=True) 70 | output, loss, loss_stats = model_with_loss(batch) 71 | loss = loss.mean() 72 | if phase == 'train': 73 | self.optimizer.zero_grad() 74 | loss.backward() 75 | self.optimizer.step() 76 | batch_time.update(time.time() - end) 77 | end = time.time() 78 | 79 | Bar.suffix = '{phase}: [{0}][{1}/{2}]|Tot: {total:} |ETA: {eta:} '.format( 80 | epoch, iter_id, num_iters, phase=phase, 81 | total=bar.elapsed_td, eta=bar.eta_td) 82 | for l in avg_loss_stats: 83 | avg_loss_stats[l].update( 84 | loss_stats[l].mean().item(), batch['input'].size(0)) 85 | Bar.suffix = Bar.suffix + '|{} {:.4f} '.format(l, avg_loss_stats[l].avg) 86 | if not opt.hide_data_time: 87 | Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \ 88 | '|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time) 89 | if opt.print_iter > 0: 90 | if iter_id % opt.print_iter == 0: 91 | print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix)) 92 | else: 93 | bar.next() 94 | 95 | if opt.debug > 0: 96 | self.debug(batch, output, iter_id) 97 | 98 | if opt.test: 99 | self.save_result(output, batch, results) 100 | del output, loss, loss_stats 101 | 102 | bar.finish() 103 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 104 | ret['time'] = bar.elapsed_td.total_seconds() / 60. 105 | return ret, results 106 | 107 | def debug(self, batch, output, iter_id): 108 | raise NotImplementedError 109 | 110 | def save_result(self, output, batch, results): 111 | raise NotImplementedError 112 | 113 | def _get_losses(self, opt): 114 | raise NotImplementedError 115 | 116 | def val(self, epoch, data_loader): 117 | return self.run_epoch('val', epoch, data_loader) 118 | 119 | def train(self, epoch, data_loader): 120 | return self.run_epoch('train', epoch, data_loader) 121 | -------------------------------------------------------------------------------- /src/lib/trains/ctdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import numpy as np 7 | 8 | from models.losses import FocalLoss 9 | from models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss 10 | from models.decode import ctdet_decode 11 | from models.utils import _sigmoid 12 | from utils.debugger import Debugger 13 | from utils.post_process import ctdet_post_process 14 | from utils.oracle_utils import gen_oracle_map 15 | from .base_trainer import BaseTrainer 16 | 17 | 18 | class CtdetLoss(torch.nn.Module): 19 | def __init__(self, opt): 20 | super(CtdetLoss, self).__init__() 21 | self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss() 22 | self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \ 23 | RegLoss() if opt.reg_loss == 'sl1' else None 24 | self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \ 25 | NormRegL1Loss() if opt.norm_wh else \ 26 | RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg 27 | self.opt = opt 28 | 29 | def forward(self, outputs, batch): 30 | opt = self.opt 31 | hm_act_loss, wh_act_loss = 0, 0 32 | for s in range(opt.num_stacks): 33 | output = outputs[s] 34 | if not opt.mse_loss: 35 | output['hm_act_f'] = _sigmoid(output['hm_act_f']) 36 | hm_act_loss += self.crit(output['hm_act_f'], batch['hm_act']) / opt.num_stacks 37 | wh_act_loss += self.crit_reg(output['wh_act'], batch['reg_act_mask'], 38 | batch['ind_act'], batch['wh_act']) / opt.num_stacks 39 | loss = opt.hm_act_weight * hm_act_loss + opt.wh_weight * wh_act_loss 40 | loss_stats = {'loss': loss, 'hm_act_loss': hm_act_loss, 'wh_act_loss': wh_act_loss} 41 | return loss, loss_stats 42 | 43 | 44 | class CtdetTrainer(BaseTrainer): 45 | def __init__(self, opt, model, optimizer=None): 46 | super(CtdetTrainer, self).__init__(opt, model, optimizer=optimizer) 47 | 48 | def _get_losses(self, opt): 49 | loss_states = ['loss', 'hm_act_loss', 'wh_act_loss'] 50 | loss = CtdetLoss(opt) 51 | return loss_states, loss 52 | 53 | def debug(self, batch, output, iter_id): 54 | opt = self.opt 55 | reg = output['reg'] if opt.reg_offset else None 56 | dets = ctdet_decode( 57 | output['hm'], output['wh'], reg=reg, 58 | cat_spec_wh=opt.cat_spec_wh, K=opt.K) 59 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 60 | dets[:, :, :4] *= opt.down_ratio 61 | dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) 62 | dets_gt[:, :, :4] *= opt.down_ratio 63 | for i in range(1): 64 | debugger = Debugger( 65 | dataset=opt.dataset, ipynb=(opt.debug==3), theme=opt.debugger_theme) 66 | img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) 67 | img = np.clip((( 68 | img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) 69 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) 70 | gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) 71 | debugger.add_blend_img(img, pred, 'pred_hm') 72 | debugger.add_blend_img(img, gt, 'gt_hm') 73 | debugger.add_img(img, img_id='out_pred') 74 | for k in range(len(dets[i])): 75 | if dets[i, k, 4] > opt.center_thresh: 76 | debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], 77 | dets[i, k, 4], img_id='out_pred') 78 | 79 | debugger.add_img(img, img_id='out_gt') 80 | for k in range(len(dets_gt[i])): 81 | if dets_gt[i, k, 4] > opt.center_thresh: 82 | debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], 83 | dets_gt[i, k, 4], img_id='out_gt') 84 | 85 | if opt.debug == 4: 86 | debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) 87 | else: 88 | debugger.show_all_imgs(pause=True) 89 | 90 | def save_result(self, output, batch, results): 91 | reg = output['reg'] if self.opt.reg_offset else None 92 | dets = ctdet_decode( 93 | output['hm'], output['wh'], reg=reg, 94 | cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) 95 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 96 | dets_out = ctdet_post_process( 97 | dets.copy(), batch['meta']['c'].cpu().numpy(), 98 | batch['meta']['s'].cpu().numpy(), 99 | output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) 100 | results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] 101 | -------------------------------------------------------------------------------- /src/lib/trains/train_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from .ctdet import CtdetTrainer 6 | 7 | train_factory = { 8 | 'ctdet': CtdetTrainer} 9 | 10 | -------------------------------------------------------------------------------- /src/lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vaesl/IP-Net/1c329cc17b245ebb13fb5ea411b97f02e32320fc/src/lib/utils/__init__.py -------------------------------------------------------------------------------- /src/lib/utils/ddd_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import cv2 7 | 8 | def compute_box_3d(dim, location, rotation_y): 9 | # dim: 3 10 | # location: 3 11 | # rotation_y: 1 12 | # return: 8 x 3 13 | c, s = np.cos(rotation_y), np.sin(rotation_y) 14 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 15 | l, w, h = dim[2], dim[1], dim[0] 16 | x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2] 17 | y_corners = [0,0,0,0,-h,-h,-h,-h] 18 | z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2] 19 | 20 | corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32) 21 | corners_3d = np.dot(R, corners) 22 | corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1) 23 | return corners_3d.transpose(1, 0) 24 | 25 | def project_to_image(pts_3d, P): 26 | # pts_3d: n x 3 27 | # P: 3 x 4 28 | # return: n x 2 29 | pts_3d_homo = np.concatenate( 30 | [pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1) 31 | pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0) 32 | pts_2d = pts_2d[:, :2] / pts_2d[:, 2:] 33 | # import pdb; pdb.set_trace() 34 | return pts_2d 35 | 36 | def compute_orientation_3d(dim, location, rotation_y): 37 | # dim: 3 38 | # location: 3 39 | # rotation_y: 1 40 | # return: 2 x 3 41 | c, s = np.cos(rotation_y), np.sin(rotation_y) 42 | R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32) 43 | orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32) 44 | orientation_3d = np.dot(R, orientation_3d) 45 | orientation_3d = orientation_3d + \ 46 | np.array(location, dtype=np.float32).reshape(3, 1) 47 | return orientation_3d.transpose(1, 0) 48 | 49 | def draw_box_3d(image, corners, c=(0, 0, 255)): 50 | face_idx = [[0,1,5,4], 51 | [1,2,6, 5], 52 | [2,3,7,6], 53 | [3,0,4,7]] 54 | for ind_f in range(3, -1, -1): 55 | f = face_idx[ind_f] 56 | for j in range(4): 57 | cv2.line(image, (corners[f[j], 0], corners[f[j], 1]), 58 | (corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), c, 2, lineType=cv2.LINE_AA) 59 | if ind_f == 0: 60 | cv2.line(image, (corners[f[0], 0], corners[f[0], 1]), 61 | (corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA) 62 | cv2.line(image, (corners[f[1], 0], corners[f[1], 1]), 63 | (corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA) 64 | return image 65 | 66 | def unproject_2d_to_3d(pt_2d, depth, P): 67 | # pts_2d: 2 68 | # depth: 1 69 | # P: 3 x 4 70 | # return: 3 71 | z = depth - P[2, 3] 72 | x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0] 73 | y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1] 74 | pt_3d = np.array([x, y, z], dtype=np.float32) 75 | return pt_3d 76 | 77 | def alpha2rot_y(alpha, x, cx, fx): 78 | """ 79 | Get rotation_y by alpha + theta - 180 80 | alpha : Observation angle of object, ranging [-pi..pi] 81 | x : Object center x to the camera center (x-W/2), in pixels 82 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 83 | """ 84 | rot_y = alpha + np.arctan2(x - cx, fx) 85 | if rot_y > np.pi: 86 | rot_y -= 2 * np.pi 87 | if rot_y < -np.pi: 88 | rot_y += 2 * np.pi 89 | return rot_y 90 | 91 | def rot_y2alpha(rot_y, x, cx, fx): 92 | """ 93 | Get rotation_y by alpha + theta - 180 94 | alpha : Observation angle of object, ranging [-pi..pi] 95 | x : Object center x to the camera center (x-W/2), in pixels 96 | rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi] 97 | """ 98 | alpha = rot_y - np.arctan2(x - cx, fx) 99 | if alpha > np.pi: 100 | alpha -= 2 * np.pi 101 | if alpha < -np.pi: 102 | alpha += 2 * np.pi 103 | return alpha 104 | 105 | 106 | def ddd2locrot(center, alpha, dim, depth, calib): 107 | # single image 108 | locations = unproject_2d_to_3d(center, depth, calib) 109 | locations[1] += dim[0] / 2 110 | rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0]) 111 | return locations, rotation_y 112 | 113 | def project_3d_bbox(location, dim, rotation_y, calib): 114 | box_3d = compute_box_3d(dim, location, rotation_y) 115 | box_2d = project_to_image(box_3d, calib) 116 | return box_2d 117 | 118 | 119 | if __name__ == '__main__': 120 | calib = np.array( 121 | [[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01], 122 | [0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01], 123 | [0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]], 124 | dtype=np.float32) 125 | alpha = -0.20 126 | tl = np.array([712.40, 143.00], dtype=np.float32) 127 | br = np.array([810.73, 307.92], dtype=np.float32) 128 | ct = (tl + br) / 2 129 | rotation_y = 0.01 130 | print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0])) 131 | print('rotation_y', rotation_y) -------------------------------------------------------------------------------- /src/lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import cv2 14 | import random 15 | 16 | 17 | def flip(img): 18 | return img[:, :, ::-1].copy() 19 | 20 | 21 | def transform_preds(coords, center, scale, output_size): 22 | target_coords = np.zeros(coords.shape) 23 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 24 | for p in range(coords.shape[0]): 25 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 26 | return target_coords 27 | 28 | 29 | def get_affine_transform(center, 30 | scale, 31 | rot, 32 | output_size, 33 | shift=np.array([0, 0], dtype=np.float32), 34 | inv=0): 35 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 36 | scale = np.array([scale, scale], dtype=np.float32) 37 | 38 | scale_tmp = scale 39 | src_w = scale_tmp[0] 40 | dst_w = output_size[0] 41 | dst_h = output_size[1] 42 | 43 | rot_rad = np.pi * rot / 180 44 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 45 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 46 | 47 | src = np.zeros((3, 2), dtype=np.float32) 48 | dst = np.zeros((3, 2), dtype=np.float32) 49 | src[0, :] = center + scale_tmp * shift 50 | src[1, :] = center + src_dir + scale_tmp * shift 51 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 52 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 53 | 54 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 55 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 56 | 57 | if inv: 58 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 59 | else: 60 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 61 | 62 | return trans 63 | 64 | 65 | def affine_transform(pt, t): 66 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 67 | new_pt = np.dot(t, new_pt) 68 | return new_pt[:2] 69 | 70 | 71 | def get_3rd_point(a, b): 72 | direct = a - b 73 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 74 | 75 | 76 | def get_dir(src_point, rot_rad): 77 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 78 | 79 | src_result = [0, 0] 80 | src_result[0] = src_point[0] * cs - src_point[1] * sn 81 | src_result[1] = src_point[0] * sn + src_point[1] * cs 82 | 83 | return src_result 84 | 85 | 86 | def crop(img, center, scale, output_size, rot=0): 87 | trans = get_affine_transform(center, scale, rot, output_size) 88 | 89 | dst_img = cv2.warpAffine(img, 90 | trans, 91 | (int(output_size[0]), int(output_size[1])), 92 | flags=cv2.INTER_LINEAR) 93 | 94 | return dst_img 95 | 96 | 97 | def gaussian_radius(det_size, min_overlap=0.7): 98 | height, width = det_size 99 | 100 | a1 = 1 101 | b1 = (height + width) 102 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 103 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 104 | r1 = (b1 + sq1) / 2 105 | 106 | a2 = 4 107 | b2 = 2 * (height + width) 108 | c2 = (1 - min_overlap) * width * height 109 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 110 | r2 = (b2 + sq2) / 2 111 | 112 | a3 = 4 * min_overlap 113 | b3 = -2 * min_overlap * (height + width) 114 | c3 = (min_overlap - 1) * width * height 115 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 116 | r3 = (b3 + sq3) / 2 117 | return min(r1, r2, r3) 118 | 119 | 120 | def gaussian2D(shape, sigma=1): 121 | m, n = [(ss - 1.) / 2. for ss in shape] 122 | y, x = np.ogrid[-m:m+1,-n:n+1] 123 | 124 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 125 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 126 | return h 127 | 128 | 129 | def draw_umich_gaussian(heatmap, center, radius, k=1): 130 | diameter = 2 * radius + 1 131 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 132 | 133 | x, y = int(center[0]), int(center[1]) 134 | 135 | height, width = heatmap.shape[0:2] 136 | 137 | left, right = min(x, radius), min(width - x, radius + 1) 138 | top, bottom = min(y, radius), min(height - y, radius + 1) 139 | 140 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 141 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 142 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 143 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 144 | return heatmap 145 | 146 | 147 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False): 148 | diameter = 2 * radius + 1 149 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 150 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1) 151 | dim = value.shape[0] 152 | reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value 153 | if is_offset and dim == 2: 154 | delta = np.arange(diameter*2+1) - radius 155 | reg[0] = reg[0] - delta.reshape(1, -1) 156 | reg[1] = reg[1] - delta.reshape(-1, 1) 157 | 158 | x, y = int(center[0]), int(center[1]) 159 | 160 | height, width = heatmap.shape[0:2] 161 | 162 | left, right = min(x, radius), min(width - x, radius + 1) 163 | top, bottom = min(y, radius), min(height - y, radius + 1) 164 | 165 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 166 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right] 167 | masked_gaussian = gaussian[radius - top:radius + bottom, 168 | radius - left:radius + right] 169 | masked_reg = reg[:, radius - top:radius + bottom, 170 | radius - left:radius + right] 171 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 172 | idx = (masked_gaussian >= masked_heatmap).reshape( 173 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1]) 174 | masked_regmap = (1-idx) * masked_regmap + idx * masked_reg 175 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap 176 | return regmap 177 | 178 | 179 | def draw_msra_gaussian(heatmap, center, sigma): 180 | tmp_size = sigma * 3 181 | mu_x = int(center[0] + 0.5) 182 | mu_y = int(center[1] + 0.5) 183 | w, h = heatmap.shape[0], heatmap.shape[1] 184 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 185 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 186 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0: 187 | return heatmap 188 | size = 2 * tmp_size + 1 189 | x = np.arange(0, size, 1, np.float32) 190 | y = x[:, np.newaxis] 191 | x0 = y0 = size // 2 192 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 193 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0] 194 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1] 195 | img_x = max(0, ul[0]), min(br[0], h) 196 | img_y = max(0, ul[1]), min(br[1], w) 197 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum( 198 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]], 199 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]]) 200 | return heatmap 201 | 202 | 203 | def grayscale(image): 204 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 205 | 206 | def lighting_(data_rng, image, alphastd, eigval, eigvec): 207 | alpha = data_rng.normal(scale=alphastd, size=(3, )) 208 | image += np.dot(eigvec, eigval * alpha) 209 | 210 | 211 | def blend_(alpha, image1, image2): 212 | image1 *= alpha 213 | image2 *= (1 - alpha) 214 | image1 += image2 215 | 216 | 217 | def saturation_(data_rng, image, gs, gs_mean, var): 218 | alpha = 1. + data_rng.uniform(low=-var, high=var) 219 | blend_(alpha, image, gs[:, :, None]) 220 | 221 | 222 | def brightness_(data_rng, image, gs, gs_mean, var): 223 | alpha = 1. + data_rng.uniform(low=-var, high=var) 224 | image *= alpha 225 | 226 | 227 | def contrast_(data_rng, image, gs, gs_mean, var): 228 | alpha = 1. + data_rng.uniform(low=-var, high=var) 229 | blend_(alpha, image, gs_mean) 230 | 231 | 232 | def color_aug(data_rng, image, eig_val, eig_vec): 233 | functions = [brightness_, contrast_, saturation_] 234 | random.shuffle(functions) 235 | 236 | gs = grayscale(image) 237 | gs_mean = gs.mean() 238 | for f in functions: 239 | f(data_rng, image, gs, gs_mean, 0.4) 240 | lighting_(data_rng, image, 0.1, eig_val, eig_vec) 241 | -------------------------------------------------------------------------------- /src/lib/utils/oracle_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import numba 7 | 8 | @numba.jit(nopython=True, nogil=True) 9 | def gen_oracle_map(feat, ind, w, h): 10 | # feat: B x maxN x featDim 11 | # ind: B x maxN 12 | batch_size = feat.shape[0] 13 | max_objs = feat.shape[1] 14 | feat_dim = feat.shape[2] 15 | out = np.zeros((batch_size, feat_dim, h, w), dtype=np.float32) 16 | vis = np.zeros((batch_size, h, w), dtype=np.uint8) 17 | ds = [(0, 1), (0, -1), (1, 0), (-1, 0)] 18 | for i in range(batch_size): 19 | queue_ind = np.zeros((h*w*2, 2), dtype=np.int32) 20 | queue_feat = np.zeros((h*w*2, feat_dim), dtype=np.float32) 21 | head, tail = 0, 0 22 | for j in range(max_objs): 23 | if ind[i][j] > 0: 24 | x, y = ind[i][j] % w, ind[i][j] // w 25 | out[i, :, y, x] = feat[i][j] 26 | vis[i, y, x] = 1 27 | queue_ind[tail] = x, y 28 | queue_feat[tail] = feat[i][j] 29 | tail += 1 30 | while tail - head > 0: 31 | x, y = queue_ind[head] 32 | f = queue_feat[head] 33 | head += 1 34 | for (dx, dy) in ds: 35 | xx, yy = x + dx, y + dy 36 | if xx >= 0 and yy >= 0 and xx < w and yy < h and vis[i, yy, xx] < 1: 37 | out[i, :, yy, xx] = f 38 | vis[i, yy, xx] = 1 39 | queue_ind[tail] = xx, yy 40 | queue_feat[tail] = f 41 | tail += 1 42 | return out -------------------------------------------------------------------------------- /src/lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from .image import transform_preds 7 | 8 | 9 | def get_pred_depth(depth): 10 | return depth 11 | 12 | 13 | def get_alpha(rot): 14 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 15 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 16 | # return rot[:, 0] 17 | idx = rot[:, 1] > rot[:, 5] 18 | alpha1 = np.arctan(rot[:, 2] / rot[:, 3]) + (-0.5 * np.pi) 19 | alpha2 = np.arctan(rot[:, 6] / rot[:, 7]) + ( 0.5 * np.pi) 20 | return alpha1 * idx + alpha2 * (1 - idx) 21 | 22 | 23 | def ctdet_post_process(dets_act, c, s, h, w, num_obj_classes, num_act_classes): 24 | ret_act = [] 25 | for i in range(dets_act.shape[0]): 26 | top_preds_act = {} 27 | dets_act[i, :, :2] = transform_preds( 28 | dets_act[i, :, 0:2], c[i], s[i], (w, h)) 29 | 30 | dets_act[i, :, 2:4] = transform_preds( 31 | dets_act[i, :, 2:4], c[i], s[i], (w, h)) 32 | 33 | dets_act[i, :, 4:6] = transform_preds( 34 | dets_act[i, :, 4:6], c[i], s[i], (w, h)) 35 | 36 | # print(dets_act[0]) 37 | 38 | classes_act = dets_act[i, :, -1] 39 | 40 | for j in range(num_act_classes): 41 | inds = (classes_act == j) 42 | top_preds_act[j + 1] = np.concatenate([ 43 | dets_act[i, inds, :6].astype(np.float32), 44 | dets_act[i, inds, 6:7].astype(np.float32)], axis=1).tolist() 45 | 46 | ret_act.append(top_preds_act) 47 | 48 | return ret_act 49 | -------------------------------------------------------------------------------- /src/lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | 7 | class AverageMeter(object): 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | if self.count > 0: 23 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /src/test_HOI.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import pickle 7 | import json 8 | import numpy as np 9 | import _init_paths 10 | 11 | from opts import opts 12 | from timer import Timer 13 | from logger import Logger 14 | from vsrl_eval import VCOCOeval 15 | from apply_prior import apply_prior 16 | from datasets.dataset_factory import dataset_factory 17 | from detectors.detector_factory import detector_factory 18 | 19 | 20 | def getSigmoid(sigmoid_coeff, x): 21 | a, b, c, d = sigmoid_coeff 22 | e = 2.718281828459 23 | return a / (1 + e**(b - c * x)) + d 24 | 25 | 26 | def dis(A, B): 27 | distance = np.sqrt(np.sum(np.square(A - B))) 28 | return distance 29 | 30 | 31 | def iou(box1, box2): 32 | area1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1) 33 | area2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1) 34 | inter = max(min(box1[2], box2[2]) - max(box1[0], box2[0]) + 1, 0) * \ 35 | max(min(box1[3], box2[3]) - max(box1[1], box2[1]) + 1, 0) 36 | iou = 1.0 * inter / (area1 + area2 - inter) 37 | return iou 38 | 39 | 40 | def test(opt, Test_RCNN, prior_mask, Action_dic_inv, output_file, human_thres, object_thres, action_thres, detection): 41 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 42 | 43 | Dataset = dataset_factory[opt.dataset] 44 | opt = opts().update_dataset_info_and_set_heads(opt, Dataset) 45 | print(opt) 46 | Logger(opt) 47 | Detector = detector_factory[opt.task] 48 | detector = Detector(opt) 49 | dataset = Dataset(opt, 'test') 50 | num_iters = len(dataset) 51 | 52 | count = 0 53 | wo_object_list = [4, 18, 23, 28] 54 | total_list = [i for i in range(1, 30)] 55 | sigmoid_coeff = (6, 6, 7, 0) 56 | h_dis_thresh = 10 57 | ho_dis_thresh = 80 58 | 59 | _t = {'im_detect': Timer(), 'misc': Timer()} 60 | 61 | for ind in range(num_iters): 62 | _t['im_detect'].tic() 63 | 64 | img_id = dataset.images[ind] 65 | img_info = dataset.coco.loadImgs(ids=[img_id])[0] 66 | img_path = os.path.join('coco/images/trainval2017/', img_info['file_name']) 67 | 68 | ret = detector.run(img_path) 69 | 70 | for H_ins in Test_RCNN[img_id]: 71 | if (np.max(H_ins[5]) > human_thres) and (H_ins[1] == 'Human'): # This is a valid human 72 | h_box = H_ins[2] 73 | h_c_x, h_c_y = (h_box[0] + h_box[2]) / 2, (h_box[1] + h_box[3]) / 2 74 | h_center = np.array([h_c_x, h_c_y]) # obtain the human center 75 | 76 | # Predict action without corresponding objects 77 | prediction_H = np.zeros(29) 78 | for i in wo_object_list: 79 | if len(ret[0][i]) != 0: 80 | for a in ret[0][i]: 81 | prediction_H[i-1] = a[6] if a[6] > action_thres and dis(a[0:2], h_center) < h_dis_thresh else 0 82 | 83 | # save image information 84 | dic = {} 85 | dic['image_id'] = img_id 86 | dic['person_box'] = H_ins[2] 87 | 88 | h_score = getSigmoid(sigmoid_coeff, H_ins[5]) 89 | 90 | # Predict actions between human and objects 91 | Score_obj = np.empty((0, 4 + 29), dtype=np.float32) 92 | 93 | for O_ins in Test_RCNN[img_id]: 94 | if (np.max(O_ins[5]) > object_thres) and (O_ins[1] == 'Object'): # This is a valid object 95 | o_box = O_ins[2] 96 | prediction_HO = np.zeros(29) 97 | o_score = getSigmoid(sigmoid_coeff, O_ins[5]) 98 | o_c_x, o_c_y = (o_box[0] + o_box[2]) / 2, (o_box[1] + o_box[3]) / 2 99 | 100 | for j in total_list: 101 | if j not in wo_object_list: 102 | if len(ret[0][j]) != 0: 103 | for a in ret[0][j]: 104 | iou_ao = iou(a[2:6], np.array(O_ins[2])) 105 | iou_ah = iou(a[2:6], np.array(H_ins[2])) 106 | if a[6] > action_thres and iou_ao > 0 and iou_ah > 0: 107 | 108 | ref_box = np.array([min(h_c_x, o_c_x), min(h_c_y, o_c_y), 109 | min(h_c_x, o_c_x), max(h_c_y, o_c_y), 110 | max(h_c_x, o_c_x), min(h_c_y, o_c_y), 111 | max(h_c_x, o_c_x), max(h_c_y, o_c_y)]) 112 | 113 | inter_box = np.array([a[2], a[3], a[2], a[5], a[4], a[3], a[4], a[5]]) 114 | 115 | dist_tl = dis(ref_box[0:2], inter_box[0:2]) 116 | dist_tr = dis(ref_box[2:4], inter_box[2:4]) 117 | dist_bl = dis(ref_box[4:6], inter_box[4:6]) 118 | dist_br = dis(ref_box[6:8], inter_box[6:8]) 119 | 120 | if dist_tl < ho_dis_thresh and dist_tr < ho_dis_thresh \ 121 | and dist_bl < ho_dis_thresh and dist_br < ho_dis_thresh: 122 | prediction_HO[j-1] = a[6] 123 | 124 | prediction_HO = apply_prior(O_ins, prediction_HO) 125 | prediction_HO = prediction_HO * prior_mask[:, O_ins[4]].reshape(1, 29) 126 | This_Score_obj = np.concatenate((O_ins[2].reshape(1, 4), prediction_HO * np.max(o_score)), axis=1) 127 | Score_obj = np.concatenate((Score_obj, This_Score_obj), axis=0) 128 | 129 | # There is only a single human detected in this image. I just ignore it. Might be better to add Nan as object box. 130 | if Score_obj.shape[0] == 0: 131 | continue 132 | 133 | # Find out the object box associated with highest action score 134 | max_idx = np.argmax(Score_obj, 0)[4:] 135 | 136 | # agent mAP 137 | for i in range(29): 138 | # ''' 139 | # walk, smile, run, stand 140 | if (i == 3) or (i == 17) or (i == 22) or (i == 27): 141 | agent_name = Action_dic_inv[i] + '_agent' 142 | dic[agent_name] = np.max(h_score) * prediction_H[i] 143 | continue 144 | 145 | # cut 146 | if i == 2: 147 | agent_name = 'cut_agent' 148 | dic[agent_name] = np.max(h_score) * max(Score_obj[max_idx[2]][4 + 2], Score_obj[max_idx[4]][4 + 4]) 149 | continue 150 | if i == 4: 151 | continue 152 | 153 | # eat 154 | if i == 9: 155 | agent_name = 'eat_agent' 156 | dic[agent_name] = np.max(h_score) * max(Score_obj[max_idx[9]][4 + 9], Score_obj[max_idx[16]][4 + 16]) 157 | continue 158 | if i == 16: 159 | continue 160 | 161 | # hit 162 | if i == 19: 163 | agent_name = 'hit_agent' 164 | dic[agent_name] = np.max(h_score) * max(Score_obj[max_idx[19]][4 + 19], Score_obj[max_idx[20]][4 + 20]) 165 | continue 166 | if i == 20: 167 | continue 168 | 169 | # These 2 classes need to save manually because there is '_' in action name 170 | if i == 6: 171 | agent_name = 'talk_on_phone_agent' 172 | dic[agent_name] = np.max(h_score) * Score_obj[max_idx[i]][4 + i] 173 | continue 174 | 175 | if i == 8: 176 | agent_name = 'work_on_computer_agent' 177 | dic[agent_name] = np.max(h_score) * Score_obj[max_idx[i]][4 + i] 178 | continue 179 | 180 | # all the rest 181 | agent_name = Action_dic_inv[i].split("_")[0] + '_agent' 182 | dic[agent_name] = np.max(h_score) * Score_obj[max_idx[i]][4 + i] 183 | # ''' 184 | 185 | # role mAP 186 | for i in range(29): 187 | # walk, smile, run, stand. Won't contribute to role mAP 188 | if (i == 3) or (i == 17) or (i == 22) or (i == 27): 189 | dic[Action_dic_inv[i]] = np.append(np.full(4, np.nan).reshape(1, 4), 190 | np.max(h_score) * prediction_H[i]) 191 | continue 192 | 193 | # Impossible to perform this action 194 | if H_ins[4] * Score_obj[max_idx[i]][4 + i] == 0: 195 | dic[Action_dic_inv[i]] = np.append(np.full(4, np.nan).reshape(1, 4), 196 | np.max(h_score) * Score_obj[max_idx[i]][4 + i]) 197 | 198 | # Action with >0 score 199 | else: 200 | dic[Action_dic_inv[i]] = np.append(Score_obj[max_idx[i]][:4], 201 | np.max(h_score) * Score_obj[max_idx[i]][4 + i]) 202 | 203 | detection.append(dic) 204 | 205 | _t['im_detect'].toc() 206 | 207 | print('im_detect: {:d}/{:d} {:.3f}s'.format(count + 1, 4946, _t['im_detect'].average_time)) 208 | count += 1 209 | 210 | pickle.dump(detection, open(output_file, "wb")) 211 | 212 | 213 | if __name__ == '__main__': 214 | opt = opts().parse() 215 | 216 | human_thres = 0.3 217 | object_thres = 0.1 218 | action_thres = 0.05 219 | 220 | np.random.seed(3) 221 | detection = [] 222 | 223 | DATA_DIR = '/home/wangtiancai/data/vcoco' 224 | 225 | with open(DATA_DIR + '/' + 'prior_mask.pkl', 'rb') as f: 226 | prior_mask = pickle.load(f, encoding='latin1') 227 | with open(DATA_DIR + '/' + 'Test_Faster_RCNN_R-50-PFN_2x_VCOCO.pkl', 'rb') as f: 228 | Test_RCNN = pickle.load(f, encoding='latin1') 229 | 230 | Action_dic = json.load(open(DATA_DIR + '/' + 'action_index.json')) 231 | Action_dic_inv = {y: x for x, y in Action_dic.items()} 232 | 233 | ROOT_DIR = '/home/wangtiancai/data/vcoco/' 234 | 235 | output_file = ROOT_DIR + '/Results/' + 'SS' + '_' + 'HOI' + '.pkl' 236 | 237 | vcocoeval = VCOCOeval(DATA_DIR + '/' + 'vcoco_test.json', 238 | DATA_DIR + '/' + 'instances_vcoco_all_2014.json', 239 | DATA_DIR + '/' + 'vcoco_test.ids') 240 | 241 | test(opt, Test_RCNN, prior_mask, Action_dic_inv, output_file, human_thres, object_thres, action_thres, detection) 242 | 243 | vcocoeval._do_eval(output_file, ovr_thresh=0.5) 244 | 245 | -------------------------------------------------------------------------------- /src/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /src/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, '../lib') 12 | add_path(lib_path) 13 | -------------------------------------------------------------------------------- /src/tools/convert_hourglass_weight.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | MODEL_PATH = '../../models/ExtremeNet_500000.pkl' 6 | OUT_PATH = '../../models/ExtremeNet_500000.pth' 7 | 8 | import torch 9 | state_dict = torch.load(MODEL_PATH) 10 | key_map = {'t_heats': 'hm_t', 'l_heats': 'hm_l', 'b_heats': 'hm_b', \ 11 | 'r_heats': 'hm_r', 'ct_heats': 'hm_c', \ 12 | 't_regrs': 'reg_t', 'l_regrs': 'reg_l', \ 13 | 'b_regrs': 'reg_b', 'r_regrs': 'reg_r'} 14 | 15 | out = {} 16 | for k in state_dict.keys(): 17 | changed = False 18 | for m in key_map.keys(): 19 | if m in k: 20 | if 'ct_heats' in k and m == 't_heats': 21 | continue 22 | new_k = k.replace(m, key_map[m]) 23 | out[new_k] = state_dict[k] 24 | changed = True 25 | print('replace {} to {}'.format(k, new_k)) 26 | if not changed: 27 | out[k] = state_dict[k] 28 | data = {'epoch': 0, 29 | 'state_dict': out} 30 | torch.save(data, OUT_PATH) 31 | -------------------------------------------------------------------------------- /src/tools/eval_coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import sys 8 | import cv2 9 | import numpy as np 10 | import pickle 11 | import os 12 | 13 | this_dir = os.path.dirname(__file__) 14 | ANN_PATH = this_dir + '../../data/coco/annotations/instances_val2017.json' 15 | print(ANN_PATH) 16 | if __name__ == '__main__': 17 | pred_path = sys.argv[1] 18 | coco = coco.COCO(ANN_PATH) 19 | dets = coco.loadRes(pred_path) 20 | img_ids = coco.getImgIds() 21 | num_images = len(img_ids) 22 | coco_eval = COCOeval(coco, dets, "bbox") 23 | coco_eval.evaluate() 24 | coco_eval.accumulate() 25 | coco_eval.summarize() 26 | -------------------------------------------------------------------------------- /src/tools/eval_coco_hp.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import sys 8 | import cv2 9 | import numpy as np 10 | import pickle 11 | import os 12 | 13 | this_dir = os.path.dirname(__file__) 14 | ANN_PATH = this_dir + '../../data/coco/annotations/person_keypoints_val2017.json' 15 | print(ANN_PATH) 16 | if __name__ == '__main__': 17 | pred_path = sys.argv[1] 18 | coco = coco.COCO(ANN_PATH) 19 | dets = coco.loadRes(pred_path) 20 | img_ids = coco.getImgIds() 21 | num_images = len(img_ids) 22 | coco_eval = COCOeval(coco, dets, "keypoints") 23 | coco_eval.evaluate() 24 | coco_eval.accumulate() 25 | coco_eval.summarize() 26 | coco_eval = COCOeval(coco, dets, "bbox") 27 | coco_eval.evaluate() 28 | coco_eval.accumulate() 29 | coco_eval.summarize() 30 | 31 | -------------------------------------------------------------------------------- /src/tools/reval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -------------------------------------------------------- 4 | # Fast R-CNN 5 | # Copyright (c) 2015 Microsoft 6 | # Licensed under The MIT License [see LICENSE for details] 7 | # Written by Ross Girshick 8 | # Modified by Xingyi Zhou 9 | # -------------------------------------------------------- 10 | 11 | # Reval = re-eval. Re-evaluate saved detections. 12 | from __future__ import absolute_import 13 | from __future__ import division 14 | from __future__ import print_function 15 | 16 | import sys 17 | import os.path as osp 18 | sys.path.insert(0, osp.join(osp.dirname(__file__), 'voc_eval_lib')) 19 | 20 | from model.test import apply_nms 21 | from datasets.pascal_voc import pascal_voc 22 | import pickle 23 | import os, argparse 24 | import numpy as np 25 | import json 26 | 27 | def parse_args(): 28 | """ 29 | Parse input arguments 30 | """ 31 | parser = argparse.ArgumentParser(description='Re-evaluate results') 32 | parser.add_argument('detection_file', type=str) 33 | parser.add_argument('--output_dir', help='results directory', type=str) 34 | parser.add_argument('--imdb', dest='imdb_name', 35 | help='dataset to re-evaluate', 36 | default='voc_2007_test', type=str) 37 | parser.add_argument('--matlab', dest='matlab_eval', 38 | help='use matlab for evaluation', 39 | action='store_true') 40 | parser.add_argument('--comp', dest='comp_mode', help='competition mode', 41 | action='store_true') 42 | parser.add_argument('--nms', dest='apply_nms', help='apply nms', 43 | action='store_true') 44 | 45 | if len(sys.argv) == 1: 46 | parser.print_help() 47 | sys.exit(1) 48 | 49 | args = parser.parse_args() 50 | return args 51 | 52 | 53 | def from_dets(imdb_name, detection_file, args): 54 | imdb = pascal_voc('test', '2007') 55 | imdb.competition_mode(args.comp_mode) 56 | imdb.config['matlab_eval'] = args.matlab_eval 57 | with open(os.path.join(detection_file), 'rb') as f: 58 | if 'json' in detection_file: 59 | dets = json.load(f) 60 | else: 61 | dets = pickle.load(f, encoding='latin1') 62 | # import pdb; pdb.set_trace() 63 | if args.apply_nms: 64 | print('Applying NMS to all detections') 65 | test_nms = 0.3 66 | nms_dets = apply_nms(dets, test_nms) 67 | else: 68 | nms_dets = dets 69 | 70 | print('Evaluating detections') 71 | imdb.evaluate_detections(nms_dets) 72 | 73 | 74 | if __name__ == '__main__': 75 | args = parse_args() 76 | 77 | imdb_name = args.imdb_name 78 | from_dets(imdb_name, args.detection_file, args) 79 | -------------------------------------------------------------------------------- /src/tools/vis_pred.py: -------------------------------------------------------------------------------- 1 | import pycocotools.coco as coco 2 | from pycocotools.cocoeval import COCOeval 3 | import sys 4 | import cv2 5 | import numpy as np 6 | import pickle 7 | IMG_PATH = '../../data/coco/val2017/' 8 | ANN_PATH = '../../data/coco/annotations/instances_val2017.json' 9 | DEBUG = True 10 | 11 | def _coco_box_to_bbox(box): 12 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 13 | dtype=np.int32) 14 | return bbox 15 | 16 | _cat_ids = [ 17 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 18 | 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 19 | 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 20 | 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 21 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 22 | 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 23 | 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 24 | 82, 84, 85, 86, 87, 88, 89, 90 25 | ] 26 | num_classes = 80 27 | _classes = { 28 | ind + 1: cat_id for ind, cat_id in enumerate(_cat_ids) 29 | } 30 | _to_order = {cat_id: ind for ind, cat_id in enumerate(_cat_ids)} 31 | coco = coco.COCO(ANN_PATH) 32 | CAT_NAMES = [coco.loadCats([_classes[i + 1]])[0]['name'] \ 33 | for i in range(num_classes)] 34 | COLORS = [((np.random.random((3, )) * 0.6 + 0.4)*255).astype(np.uint8) \ 35 | for _ in range(num_classes)] 36 | 37 | 38 | def add_box(image, bbox, sc, cat_id): 39 | cat_id = _to_order[cat_id] 40 | cat_name = CAT_NAMES[cat_id] 41 | cat_size = cv2.getTextSize(cat_name + '0', cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0] 42 | color = np.array(COLORS[cat_id]).astype(np.int32).tolist() 43 | txt = '{}{:.0f}'.format(cat_name, sc * 10) 44 | if bbox[1] - cat_size[1] - 2 < 0: 45 | cv2.rectangle(image, 46 | (bbox[0], bbox[1] + 2), 47 | (bbox[0] + cat_size[0], bbox[1] + cat_size[1] + 2), 48 | color, -1) 49 | cv2.putText(image, txt, 50 | (bbox[0], bbox[1] + cat_size[1] + 2), 51 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) 52 | else: 53 | cv2.rectangle(image, 54 | (bbox[0], bbox[1] - cat_size[1] - 2), 55 | (bbox[0] + cat_size[0], bbox[1] - 2), 56 | color, -1) 57 | cv2.putText(image, txt, 58 | (bbox[0], bbox[1] - 2), 59 | cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), thickness=1) 60 | cv2.rectangle(image, 61 | (bbox[0], bbox[1]), 62 | (bbox[2], bbox[3]), 63 | color, 2) 64 | return image 65 | 66 | if __name__ == '__main__': 67 | dets = [] 68 | img_ids = coco.getImgIds() 69 | num_images = len(img_ids) 70 | for k in range(1, len(sys.argv)): 71 | pred_path = sys.argv[k] 72 | dets.append(coco.loadRes(pred_path)) 73 | # import pdb; pdb.set_trace() 74 | for i, img_id in enumerate(img_ids): 75 | img_info = coco.loadImgs(ids=[img_id])[0] 76 | img_path = IMG_PATH + img_info['file_name'] 77 | img = cv2.imread(img_path) 78 | gt_ids = coco.getAnnIds(imgIds=[img_id]) 79 | gts = coco.loadAnns(gt_ids) 80 | gt_img = img.copy() 81 | for j, pred in enumerate(gts): 82 | bbox = _coco_box_to_bbox(pred['bbox']) 83 | cat_id = pred['category_id'] 84 | gt_img = add_box(gt_img, bbox, 0, cat_id) 85 | for k in range(len(dets)): 86 | pred_ids = dets[k].getAnnIds(imgIds=[img_id]) 87 | preds = dets[k].loadAnns(pred_ids) 88 | pred_img = img.copy() 89 | for j, pred in enumerate(preds): 90 | bbox = _coco_box_to_bbox(pred['bbox']) 91 | sc = pred['score'] 92 | cat_id = pred['category_id'] 93 | if sc > 0.2: 94 | pred_img = add_box(pred_img, bbox, sc, cat_id) 95 | cv2.imshow('pred{}'.format(k), pred_img) 96 | # cv2.imwrite('vis/{}_pred{}.png'.format(i, k), pred_img) 97 | cv2.imshow('gt', gt_img) 98 | # cv2.imwrite('vis/{}_gt.png'.format(i), gt_img) 99 | cv2.waitKey() 100 | # coco_eval.evaluate() 101 | # coco_eval.accumulate() 102 | # coco_eval.summarize() 103 | 104 | 105 | --------------------------------------------------------------------------------