├── .gitignore ├── README.md ├── experiments └── lizeming │ ├── light_head_rcnn.ori_res101.coco.ps_roialign │ ├── config.py │ ├── dataset.py │ ├── network_desp.py │ └── test.py │ ├── light_head_rcnn.ori_res101.coco │ ├── config.py │ ├── dataset.py │ ├── network_desp.py │ └── test.py │ └── rfcn_reproduce.ori_res101.coco.baseline │ ├── config.py │ ├── dataset.py │ ├── network_desp.py │ └── test.py ├── lib ├── __init__.py ├── datasets_odgt │ ├── __init__.py │ ├── coco.py │ ├── cocoval.py │ └── lib_coco │ │ ├── PythonAPI │ │ ├── Makefile │ │ ├── pycocoDemo.ipynb │ │ ├── pycocoEvalDemo.ipynb │ │ ├── pycocotools │ │ │ ├── __init__.py │ │ │ ├── _mask.c │ │ │ ├── _mask.pyx │ │ │ ├── coco.py │ │ │ ├── cocoeval.py │ │ │ └── mask.py │ │ └── setup.py │ │ └── common │ │ ├── gason.cpp │ │ ├── gason.h │ │ ├── maskApi.c │ │ └── maskApi.h ├── detection_opr │ ├── __init__.py │ ├── box_utils │ │ ├── __init__.py │ │ ├── bbox.c │ │ ├── bbox.pyx │ │ ├── bbox_opr.py │ │ ├── bbox_transform.py │ │ ├── bbox_transform_opr.py │ │ ├── box.py │ │ └── setup.py │ ├── rfcn_plus_plus │ │ ├── __init__.py │ │ └── rfcn_plus_plus_opr.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_target_layer_without_boxweight.py │ │ ├── generate_anchors.py │ │ ├── proposal_layer.py │ │ ├── proposal_target_layer.py │ │ └── snippets.py │ ├── rpn_batched │ │ ├── __init__.py │ │ ├── anchor_target_layer_without_boxweight.py │ │ ├── proposal_opr.py │ │ └── proposal_target_layer.py │ └── utils │ │ ├── __init__.py │ │ ├── bbox_transform.py │ │ ├── loss_opr.py │ │ ├── loss_opr_without_box_weight.py │ │ ├── nms_wrapper.py │ │ └── vis_det.py ├── lib_kernel │ ├── __init__.py │ ├── lib_nms_dev │ │ ├── make.sh │ │ ├── nms_op.cc │ │ ├── nms_op.cu.cc │ │ ├── nms_op.h │ │ ├── nms_op.py │ │ └── nms_test.py │ ├── lib_psalign_pooling │ │ ├── __init__.py │ │ ├── make.sh │ │ ├── psalign_pooling_op.cc │ │ ├── psalign_pooling_op.py │ │ ├── psalign_pooling_op_gpu.cu.cc │ │ ├── psalign_pooling_op_gpu.h │ │ ├── psalign_pooling_op_grad.py │ │ └── psalign_pooling_op_test.py │ ├── lib_psalign_pooling_ave │ │ ├── __init__.py │ │ ├── make.sh │ │ ├── psalign_pooling_op.cc │ │ ├── psalign_pooling_op.py │ │ ├── psalign_pooling_op_gpu.cu.cc │ │ ├── psalign_pooling_op_gpu.h │ │ ├── psalign_pooling_op_grad.py │ │ └── psalign_pooling_op_test.py │ ├── lib_psroi_pooling │ │ ├── __init__.py │ │ ├── make.sh │ │ ├── psroi_pooling_op.cc │ │ ├── psroi_pooling_op.py │ │ ├── psroi_pooling_op_gpu.cu.cc │ │ ├── psroi_pooling_op_gpu.h │ │ ├── psroi_pooling_op_grad.py │ │ └── psroi_pooling_op_test.py │ ├── lib_roi_align │ │ ├── Readme │ │ ├── __init__.py │ │ ├── lzm_roi_align_op_test.py │ │ ├── make.sh │ │ ├── roi_align_op.cc │ │ ├── roi_align_op.py │ │ ├── roi_align_op_gpu.cu.cc │ │ ├── roi_align_op_gpu.h │ │ ├── roi_align_op_grad.py │ │ └── roi_align_op_test.py │ └── lib_roi_pooling │ │ ├── Readme │ │ ├── __init__.py │ │ ├── make.sh │ │ ├── roi_pooling_op.cc │ │ ├── roi_pooling_op.py │ │ ├── roi_pooling_op_gpu.cu.cc │ │ ├── roi_pooling_op_gpu.h │ │ ├── roi_pooling_op_grad.py │ │ └── roi_pooling_op_test.py ├── make.sh └── utils │ ├── __init__.py │ ├── dpflow │ ├── __init__.py │ ├── data_provider.py │ ├── dpflow.py │ ├── prefetching_iter.py │ └── serialize.py │ ├── py_faster_rcnn_utils │ ├── Makefile │ ├── __init__.py │ ├── bbox.c │ ├── bbox.pyx │ ├── blob.py │ ├── boxes_grid.py │ ├── nms.c │ ├── nms.py │ ├── nms.pyx │ ├── setup.py │ └── timer.py │ ├── py_utils │ ├── __init__.py │ ├── logger.py │ └── misc.py │ └── tf_utils │ ├── __init__.py │ ├── basemodel │ ├── __init__.py │ ├── resnet_utils.py │ └── resnet_v1.py │ ├── debug_opr.py │ ├── lr_policy.py │ ├── model_helper.py │ └── model_parallel.py └── tools ├── __init__.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | *.o 3 | *.pyc 4 | *log 5 | output 6 | tensorboard 7 | lib/build 8 | lib/pycocotools 9 | lib/pycocotools/_mask.c 10 | lib/pycocotools/_mask.so 11 | .idea 12 | data 13 | .DS_Store 14 | .DS_Store? 15 | *.avi 16 | *.mp4 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Light-head R-CNN 2 | 3 | 4 | ## Introduction 5 | We release code for [Light-Head R-CNN](https://arxiv.org/abs/1711.07264). 6 | 7 | 8 | 9 | This is my best practice for my research. 10 | 11 | This repo is organized as follows: 12 | 13 | ``` 14 | light_head_rcnn/ 15 | |->experiments 16 | | |->user 17 | | | |->your_models 18 | |->lib 19 | |->tools 20 | |->output 21 | ``` 22 | 23 | ## Main Results 24 | 1. We train on COCO trainval which includes `80k` training and `35k` validation images. Test on minival which is a `5k` subset in validation datasets. Noticing test-dev should be little higher than minival. 25 | 2. We provide some crutial ablation experiments details, and it is easy to diff the difference. 26 | 3. We share our training logs in [GoogleDrive](https://drive.google.com/open?id=1-Mqj385d_1t4wcmhl25TZO1g-uw5X-xK) output folder, which contains dump models, training loss and speed of each steps. (experiments are done on 8 titan xp, and 2batches/per_gpu. Training should be within one day.) 27 | 4. Because the limitation of the time, extra experiments are comming soon. 28 | 29 | | Model Name |mAP@all|mAP@0.5|mAP@0.75|mAP@S|mAP@M|mAP@L| 30 | |------------------------------------------------------------- |------------------|------------------|--------- |------- |------- |------- | 31 | |R-FCN, ResNet-v1-101
our reproduce baseline
| 35.5 | 54.3 | 33.8 | 12.8 | 34.9 | 46.1 | 32 | |Light-Head R-CNN
ResNet-v1-101
| 38.2 | 60.9 | 41.0 | 20.9 | 42.2 | 52.8 | 33 | |Light-Head,ResNet-v1-101
+align pooling
| 39.3 | 61.0 | 42.4 | 22.2 | 43.8 | 53.2 | 34 | |Light-Head,ResNet-v1-101
+align pooling + nms0.5
| 40.0 | 62.1 | 42.9 | 22.5 | 44.6 | 54.0 | 35 | 36 | Experiments path related to model: 37 | 38 | ``` 39 | experiments/lizeming/rfcn_reproduce.ori_res101.coco.baseline 40 | experiments/lizeming/light_head_rcnn.ori_res101.coco 41 | experiments/lizeming/light_head_rcnn.ori_res101.coco.ps_roialign 42 | experiments/lizeming/light_head_rcnn.ori_res101.coco.ps_roialign 43 | ``` 44 | 45 | ## Requirements 46 | 1. tensorflow-gpu==1.5.0 (We only test on tensorflow 1.5.0, early tensorflow is not supported because of our gpu nms implementation) 47 | 2. python3. We recommend using Anaconda as it already includes many common packages. (python2 is not tested) 48 | 3. Python packages might missing. pls fix it according to the error message. 49 | 50 | ## Installation, Prepare data, Testing, Training 51 | ### Installation 52 | 1. Clone the Light-Head R-CNN repository, and we'll call the directory that you cloned Light-Head R-CNNN as `${lighthead_ROOT}`. 53 | 54 | ``` 55 | git clone https://github.com/zengarden/light_head_rcnn 56 | ``` 57 | 58 | 2. Compiling 59 | 60 | ``` 61 | cd ${lighthead_ROOT}/lib; 62 | bash make.sh 63 | ``` 64 | 65 | Make sure all of your compiling is successful. It may arise some errors, it is useful to find some common compile errors in [FAQ](##FAQ) 66 | 67 | 3. Create log dump directory, data directory. 68 | 69 | ``` 70 | cd ${lighthead_ROOT}; 71 | mkdir output 72 | mkdir data 73 | ``` 74 | 75 | ### Prepare data 76 | data should be organized as follows: 77 | 78 | ``` 79 | data/ 80 | |->imagenet_weights/res101.ckpt 81 | |->MSCOCO 82 | | |->odformat 83 | | |->instances_xxx.json 84 | | |train2014 85 | | |val2014 86 | ``` 87 | Download res101 basemodel: 88 | 89 | ``` 90 | wget -v http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz 91 | tar -xzvf resnet_v1_101_2016_08_28.tar.gz 92 | mv resnet_v1_101.ckpt res101.ckpt 93 | ``` 94 | 95 | We transfer instances_xxx.json to odformat(object detection format), each line in odformat is an annotation(json) for one image. Our transformed odformat is shared in [GoogleDrive](https://drive.google.com/open?id=1-Mqj385d_1t4wcmhl25TZO1g-uw5X-xK) odformat.zip . 96 | ### Testing 97 | 98 | 1. Using `-d` to assign gpu_id for testing. (e.g. `-d 0,1,2,3` or `-d 0-3` ) 99 | 2. Using `-s` to visualize the results. 100 | 3. Using '-se' to specify start_epoch for testing. 101 | 102 | We share our experiments output(logs) folder in [GoogleDrive](https://drive.google.com/open?id=1-Mqj385d_1t4wcmhl25TZO1g-uw5X-xK). Download it and place it to `${lighthead_ROOT}`, then test our release model. 103 | 104 | e.g. 105 | 106 | ``` 107 | cd experiments/lizeming/light_head_rcnn.ori_res101.coco.ps_roialign 108 | python3 test.py -d 0-7 -se 26 109 | ``` 110 | 111 | ### Training 112 | 113 | We provide common used train.py in tools, which can be linked to experiments folder. 114 | 115 | e.g. 116 | ``` 117 | cd experiments/lizeming/light_head_rcnn.ori_res101.coco.ps_roialign 118 | python3 config.py -tool 119 | cp tools/train.py . 120 | python3 train.py -d 0-7 121 | ``` 122 | 123 | ## Features 124 | 125 | This repo is designed be `fast` and `simple` for research. There are still some can be improved: anchor_target and proposal_target layer are `tf.py_func`, which means it will run on cpu. 126 | 127 | ## Disclaimer 128 | This is an implementation for [Light-Head R-CNN](https://arxiv.org/abs/1711.07264), it is worth noting that: 129 | 130 | * The original implementation is based on our internal Platform used in Megvii. There are slight differences in the final accuracy and running time due to the plenty details in platform switch. 131 | * The code is tested on a server with 8 Pascal Titian XP gpu, 188.00 GB memory, and 40 core cpu. 132 | * We rewrite a faster nms in our inner platform, while hear we use tf.nms instead. 133 | 134 | 135 | ## Citing Light-Head R-CNN 136 | 137 | If you find Light-Head R-CNN is useful in your research, pls consider citing: 138 | 139 | ``` 140 | @article{li2017light, 141 | title={Light-Head R-CNN: In Defense of Two-Stage Object Detector}, 142 | author={Li, Zeming and Peng, Chao and Yu, Gang and Zhang, Xiangyu and Deng, Yangdong and Sun, Jian}, 143 | journal={arXiv preprint arXiv:1711.07264}, 144 | year={2017} 145 | } 146 | ``` 147 | 148 | ## FAQ 149 | 150 | * fatal error: cuda/cuda_config.h: No such file or directory 151 | 152 | First, find where is cuda_config.h. 153 | 154 | e.g. 155 | 156 | ``` 157 | find /usr/local/lib/ | grep cuda_config.h 158 | ``` 159 | 160 | then export your cpath, like: 161 | 162 | ``` 163 | export CPATH=$CPATH:/usr/local/lib/python3.5/dist-packages/external/local_config_cuda/cuda/ 164 | ``` 165 | -------------------------------------------------------------------------------- /experiments/lizeming/light_head_rcnn.ori_res101.coco.ps_roialign/config.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import os, getpass 11 | import os.path as osp 12 | import numpy as np 13 | import argparse 14 | import sys 15 | 16 | from easydict import EasyDict as edict 17 | 18 | def add_path(path): 19 | if path not in sys.path: 20 | sys.path.insert(0, path) 21 | 22 | 23 | # ------------ please config ROOT_dir and user when u first using -------------# 24 | root_dir = osp.abspath(osp.join(osp.dirname(__file__), '..', '..', '..')) 25 | 26 | lib_path = osp.join(root_dir, 'lib') 27 | add_path(osp.join(root_dir, 'tools')) 28 | add_path(lib_path) 29 | 30 | 31 | class Config: 32 | user = getpass.getuser() 33 | # ---------- generate some dirs, e.g. dump dir, weights dir -------------------# 34 | output_dir = osp.join( 35 | root_dir, 'output', user, 36 | os.path.split(os.path.split(os.path.realpath(__file__))[0])[1]) 37 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 38 | eval_dir = osp.join(output_dir, 'eval_dump') 39 | tb_dir = osp.join(output_dir, 'tfboard_dump') 40 | weight = osp.join(root_dir, 'data/imagenet_weights/res101.ckpt') 41 | 42 | program_name = user + ":" + os.path.split( 43 | os.path.split(os.path.realpath(__file__))[0])[1] 44 | 45 | # ------------------- Data configuration --------------------------------------# 46 | 47 | from datasets_odgt.coco import COCO as datadb 48 | 49 | image_mean = np.array([102.9801, 115.9465, 122.7717]) 50 | # C.image_mean = np.array([122.7717, 102.9801, 115.9465]) 51 | seed_dataprovider = 3 52 | nr_dataflow = 16 53 | 54 | datadb = datadb 55 | class_num = datadb.num_classes 56 | num_classes = datadb.num_classes 57 | class_names = datadb.class_names 58 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 59 | 60 | batch_image_preprocess = 'pad' 61 | train_root_folder = os.path.join(root_dir, 'data/MSCOCO') 62 | train_source = os.path.join( 63 | root_dir, 'data', 'MSCOCO/odformat/coco_trainvalmini.odgt') 64 | 65 | eval_root_folder = os.path.join(root_dir, 'data/MSCOCO') 66 | eval_source = os.path.join( 67 | root_dir, 'data', 'MSCOCO/odformat/coco_minival2014.odgt') 68 | eval_json = os.path.join( 69 | root_dir, 'data', 'MSCOCO/instances_minival2014.json') 70 | 71 | filter_gt_ignore_label = True 72 | train_gt_ignore_label = False 73 | 74 | image_short_size = 800 75 | image_max_size = 1333 76 | eval_resize = True 77 | eval_image_short_size = 800 78 | eval_image_max_size = 1333 79 | 80 | test_max_boxes_per_image = 100 81 | test_cls_threshold = 0.00 82 | test_vis_threshold = 0.5 83 | test_nms = 0.5 84 | test_save_type = 'coco' 85 | 86 | batch_filter_box_size = 0 87 | max_boxes_of_image = 100 88 | nr_box_dim = 5 89 | nr_info_dim = 6 90 | 91 | stride = [16] 92 | anchor_scales = [2, 4, 8, 16, 32] 93 | anchor_ratios = [0.5, 1, 2] 94 | simga_rpn = 3 95 | 96 | rng_seed = 3 97 | EPS = 1e-14 98 | 99 | # ------------------------------------ TRAIN config -----------------------# 100 | train_batch_per_gpu = 2 101 | test_batch_per_gpu = 1 102 | bn_training = False 103 | tb_dump_interval = 500 104 | nr_image_per_epoch = 80000 # detectron 1x setting 105 | basic_lr = 5e-4 * train_batch_per_gpu * 1.25 106 | momentum = 0.9 107 | weight_decay = 0.0001 108 | 109 | from utils.tf_utils import lr_policy 110 | max_epoch = 30 111 | warm_iter = 500 112 | warm_fractor = 1.0 / 3.0 113 | multi_stage_lr_policy = lr_policy.MultiStageLR( 114 | [[19, basic_lr], [25, basic_lr * 0.1], [30, basic_lr * 0.01]]) 115 | 116 | def get_lr(self, epoch): 117 | return self.multi_stage_lr_policy.get_lr(epoch) 118 | 119 | # -----------------------------traditional rcnn config --------------------# 120 | TRAIN = edict() 121 | 122 | TRAIN.HAS_RPN = True 123 | TRAIN.DOUBLE_BIAS = False 124 | TRAIN.BIAS_DECAY = False 125 | TRAIN.USE_GT = True 126 | TRAIN.TRUNCATED = False 127 | TRAIN.ASPECT_GROUPING = True 128 | 129 | TRAIN.FG_FRACTION = 0.25 130 | TRAIN.FG_THRESH = 0.5 131 | TRAIN.BG_THRESH_HI = 0.5 132 | TRAIN.BG_THRESH_LO = 0.0 133 | TRAIN.BBOX_REG = True 134 | TRAIN.BBOX_THRESH = 0.5 135 | 136 | TRAIN.BATCH_SIZE = -1 # rcnn batch size 137 | TRAIN.nr_ohem_sampling = 256 * train_batch_per_gpu 138 | 139 | TRAIN.BBOX_NORMALIZE_TARGETS = True 140 | # Deprecated (inside weights) 141 | TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 142 | TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True 143 | TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 144 | TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 145 | 146 | TRAIN.RPN_NORMALIZE_TARGETS = False 147 | TRAIN.RPN_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 148 | TRAIN.RPN_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 149 | 150 | # IOU >= thresh: positive example 151 | TRAIN.RPN_POSITIVE_OVERLAP = 0.7 152 | TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 153 | TRAIN.RPN_CLOBBER_POSITIVES = False 154 | TRAIN.RPN_FG_FRACTION = 0.5 155 | TRAIN.RPN_BATCHSIZE = 256 156 | TRAIN.RPN_NMS_THRESH = 0.7 157 | # __C.TRAIN.RPN_MIN_SIZE = 16 158 | TRAIN.USE_ALL_GT = True 159 | TRAIN.RPN_PRE_NMS_TOP_N = 12000 160 | TRAIN.RPN_POST_NMS_TOP_N = 2000 161 | 162 | TEST = edict() 163 | TEST.BBOX_REG = True 164 | TEST.HAS_RPN = True 165 | TEST.RPN_NMS_THRESH = 0.7 166 | TEST.RPN_PRE_NMS_TOP_N = 6000 167 | TEST.RPN_POST_NMS_TOP_N = 1000 168 | 169 | 170 | config = Config() 171 | cfg = config 172 | 173 | 174 | def link_log_dir(): 175 | if not os.path.exists(osp.join(config.this_model_dir, 'log')): 176 | cmd = "ln -s " + config.output_dir + " log" 177 | os.system(cmd) 178 | 179 | 180 | def link_tools_dir(): 181 | if not os.path.exists(osp.join(config.this_model_dir, 'tools')): 182 | cmd = "ln -s " + os.path.join(root_dir, 'tools') + " tools" 183 | os.system(cmd) 184 | 185 | def make_parser(): 186 | parser = argparse.ArgumentParser() 187 | parser.add_argument( 188 | '-log', '--linklog', default=False, action='store_true') 189 | parser.add_argument( 190 | '-tool', '--link_tools', default=False, action='store_true') 191 | 192 | return parser 193 | 194 | 195 | if __name__ == '__main__': 196 | parser = make_parser() 197 | args = parser.parse_args() 198 | if args.linklog: 199 | link_log_dir() 200 | if args.link_tools: 201 | link_tools_dir() 202 | -------------------------------------------------------------------------------- /experiments/lizeming/light_head_rcnn.ori_res101.coco/config.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import os, getpass 11 | import os.path as osp 12 | import numpy as np 13 | import argparse 14 | import sys 15 | 16 | from easydict import EasyDict as edict 17 | 18 | def add_path(path): 19 | if path not in sys.path: 20 | sys.path.insert(0, path) 21 | 22 | 23 | # ------------ please config ROOT_dir and user when u first using -------------# 24 | root_dir = osp.abspath(osp.join(osp.dirname(__file__), '..', '..', '..')) 25 | 26 | lib_path = osp.join(root_dir, 'lib') 27 | add_path(osp.join(root_dir, 'tools')) 28 | add_path(lib_path) 29 | 30 | 31 | class Config: 32 | user = getpass.getuser() 33 | # ---------- generate some dirs, e.g. dump dir, weights dir -------------------# 34 | output_dir = osp.join( 35 | root_dir, 'output', user, 36 | os.path.split(os.path.split(os.path.realpath(__file__))[0])[1]) 37 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 38 | eval_dir = osp.join(output_dir, 'eval_dump') 39 | tb_dir = osp.join(output_dir, 'tfboard_dump') 40 | weight = osp.join(root_dir, 'data/imagenet_weights/res101.ckpt') 41 | 42 | program_name = user + ":" + os.path.split( 43 | os.path.split(os.path.realpath(__file__))[0])[1] 44 | 45 | # ------------------- Data configuration --------------------------------------# 46 | 47 | from datasets_odgt.coco import COCO as datadb 48 | 49 | image_mean = np.array([102.9801, 115.9465, 122.7717]) 50 | # C.image_mean = np.array([122.7717, 102.9801, 115.9465]) 51 | seed_dataprovider = 3 52 | nr_dataflow = 16 53 | 54 | datadb = datadb 55 | class_num = datadb.num_classes 56 | num_classes = datadb.num_classes 57 | class_names = datadb.class_names 58 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 59 | 60 | batch_image_preprocess = 'pad' 61 | train_root_folder = os.path.join(root_dir, 'data/MSCOCO') 62 | train_source = os.path.join( 63 | root_dir, 'data', 'MSCOCO/odformat/coco_trainvalmini.odgt') 64 | 65 | eval_root_folder = os.path.join(root_dir, 'data/MSCOCO') 66 | eval_source = os.path.join( 67 | root_dir, 'data', 'MSCOCO/odformat/coco_minival2014.odgt') 68 | eval_json = os.path.join( 69 | root_dir, 'data', 'MSCOCO/instances_minival2014.json') 70 | 71 | filter_gt_ignore_label = True 72 | train_gt_ignore_label = False 73 | 74 | image_short_size = 800 75 | image_max_size = 1333 76 | eval_resize = True 77 | eval_image_short_size = 800 78 | eval_image_max_size = 1333 79 | 80 | test_max_boxes_per_image = 100 81 | test_cls_threshold = 0.00 82 | test_vis_threshold = 0.5 83 | test_nms = 0.3 84 | test_save_type = 'coco' 85 | 86 | batch_filter_box_size = 0 87 | max_boxes_of_image = 100 88 | nr_box_dim = 5 89 | nr_info_dim = 6 90 | 91 | stride = [16] 92 | anchor_scales = [2, 4, 8, 16, 32] 93 | anchor_ratios = [0.5, 1, 2] 94 | simga_rpn = 3 95 | 96 | rng_seed = 3 97 | EPS = 1e-14 98 | 99 | # ------------------------------------ TRAIN config -----------------------# 100 | train_batch_per_gpu = 2 101 | test_batch_per_gpu = 1 102 | bn_training = False 103 | tb_dump_interval = 500 104 | nr_image_per_epoch = 80000 # detectron 1x setting 105 | basic_lr = 5e-4 * train_batch_per_gpu * 1.25 106 | momentum = 0.9 107 | weight_decay = 0.0001 108 | 109 | from utils.tf_utils import lr_policy 110 | max_epoch = 30 111 | warm_iter = 500 112 | warm_fractor = 1.0 / 3.0 113 | multi_stage_lr_policy = lr_policy.MultiStageLR( 114 | [[19, basic_lr], [25, basic_lr * 0.1], [30, basic_lr * 0.01]]) 115 | 116 | def get_lr(self, epoch): 117 | return self.multi_stage_lr_policy.get_lr(epoch) 118 | 119 | # -----------------------------traditional rcnn config --------------------# 120 | TRAIN = edict() 121 | 122 | TRAIN.HAS_RPN = True 123 | TRAIN.DOUBLE_BIAS = False 124 | TRAIN.BIAS_DECAY = False 125 | TRAIN.USE_GT = True 126 | TRAIN.TRUNCATED = False 127 | TRAIN.ASPECT_GROUPING = True 128 | 129 | TRAIN.FG_FRACTION = 0.25 130 | TRAIN.FG_THRESH = 0.5 131 | TRAIN.BG_THRESH_HI = 0.5 132 | TRAIN.BG_THRESH_LO = 0.0 133 | TRAIN.BBOX_REG = True 134 | TRAIN.BBOX_THRESH = 0.5 135 | 136 | TRAIN.BATCH_SIZE = -1 # rcnn batch size 137 | TRAIN.nr_ohem_sampling = 256 * train_batch_per_gpu 138 | 139 | TRAIN.BBOX_NORMALIZE_TARGETS = True 140 | # Deprecated (inside weights) 141 | TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 142 | TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True 143 | TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 144 | TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 145 | 146 | TRAIN.RPN_NORMALIZE_TARGETS = False 147 | TRAIN.RPN_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 148 | TRAIN.RPN_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 149 | 150 | # IOU >= thresh: positive example 151 | TRAIN.RPN_POSITIVE_OVERLAP = 0.7 152 | TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 153 | TRAIN.RPN_CLOBBER_POSITIVES = False 154 | TRAIN.RPN_FG_FRACTION = 0.5 155 | TRAIN.RPN_BATCHSIZE = 256 156 | TRAIN.RPN_NMS_THRESH = 0.7 157 | # __C.TRAIN.RPN_MIN_SIZE = 16 158 | TRAIN.USE_ALL_GT = True 159 | TRAIN.RPN_PRE_NMS_TOP_N = 12000 160 | TRAIN.RPN_POST_NMS_TOP_N = 2000 161 | 162 | TEST = edict() 163 | TEST.BBOX_REG = True 164 | TEST.HAS_RPN = True 165 | TEST.RPN_NMS_THRESH = 0.7 166 | TEST.RPN_PRE_NMS_TOP_N = 6000 167 | TEST.RPN_POST_NMS_TOP_N = 1000 168 | 169 | 170 | config = Config() 171 | cfg = config 172 | 173 | 174 | def link_log_dir(): 175 | if not os.path.exists(osp.join(config.this_model_dir, 'log')): 176 | cmd = "ln -s " + config.output_dir + " log" 177 | os.system(cmd) 178 | 179 | 180 | def link_tools_dir(): 181 | if not os.path.exists(osp.join(config.this_model_dir, 'tools')): 182 | cmd = "ln -s " + os.path.join(root_dir, 'tools') + " tools" 183 | os.system(cmd) 184 | 185 | def make_parser(): 186 | parser = argparse.ArgumentParser() 187 | parser.add_argument( 188 | '-log', '--linklog', default=False, action='store_true') 189 | parser.add_argument( 190 | '-tool', '--link_tools', default=False, action='store_true') 191 | 192 | return parser 193 | 194 | 195 | if __name__ == '__main__': 196 | parser = make_parser() 197 | args = parser.parse_args() 198 | if args.linklog: 199 | link_log_dir() 200 | if args.link_tools: 201 | link_tools_dir() 202 | -------------------------------------------------------------------------------- /experiments/lizeming/rfcn_reproduce.ori_res101.coco.baseline/config.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import os, getpass 11 | import os.path as osp 12 | import numpy as np 13 | import argparse 14 | import sys 15 | 16 | from easydict import EasyDict as edict 17 | 18 | def add_path(path): 19 | if path not in sys.path: 20 | sys.path.insert(0, path) 21 | 22 | 23 | # ------------ please config ROOT_dir and user when u first using -------------# 24 | root_dir = osp.abspath(osp.join(osp.dirname(__file__), '..', '..', '..')) 25 | 26 | lib_path = osp.join(root_dir, 'lib') 27 | add_path(osp.join(root_dir, 'tools')) 28 | add_path(lib_path) 29 | 30 | 31 | class Config: 32 | user = getpass.getuser() 33 | # ---------- generate some dirs, e.g. dump dir, weights dir -------------------# 34 | output_dir = osp.join( 35 | root_dir, 'output', user, 36 | os.path.split(os.path.split(os.path.realpath(__file__))[0])[1]) 37 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 38 | eval_dir = osp.join(output_dir, 'eval_dump') 39 | tb_dir = osp.join(output_dir, 'tfboard_dump') 40 | weight = osp.join(root_dir, 'data/imagenet_weights/res101.ckpt') 41 | 42 | program_name = user + ":" + os.path.split( 43 | os.path.split(os.path.realpath(__file__))[0])[1] 44 | 45 | # ------------------- Data configuration --------------------------------------# 46 | 47 | from datasets_odgt.coco import COCO as datadb 48 | 49 | image_mean = np.array([102.9801, 115.9465, 122.7717]) 50 | # C.image_mean = np.array([122.7717, 102.9801, 115.9465]) 51 | seed_dataprovider = 3 52 | nr_dataflow = 16 53 | 54 | datadb = datadb 55 | class_num = datadb.num_classes 56 | num_classes = datadb.num_classes 57 | class_names = datadb.class_names 58 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 59 | 60 | batch_image_preprocess = 'pad' 61 | train_root_folder = os.path.join(root_dir, 'data/MSCOCO') 62 | train_source = os.path.join( 63 | root_dir, 'data', 'MSCOCO/odformat/coco_trainvalmini.odgt') 64 | 65 | eval_root_folder = os.path.join(root_dir, 'data/MSCOCO') 66 | eval_source = os.path.join( 67 | root_dir, 'data', 'MSCOCO/odformat/coco_minival2014.odgt') 68 | eval_json = os.path.join( 69 | root_dir, 'data', 'MSCOCO/instances_minival2014.json') 70 | 71 | filter_gt_ignore_label = True 72 | train_gt_ignore_label = False 73 | 74 | image_short_size = 800 75 | image_max_size = 1333 76 | eval_resize = True 77 | eval_image_short_size = 800 78 | eval_image_max_size = 1333 79 | 80 | test_max_boxes_per_image = 100 81 | test_cls_threshold = 0.00 82 | test_vis_threshold = 0.5 83 | test_nms = 0.3 84 | test_save_type = 'coco' 85 | 86 | batch_filter_box_size = 0 87 | max_boxes_of_image = 100 88 | nr_box_dim = 5 89 | nr_info_dim = 6 90 | 91 | stride = [16] 92 | anchor_scales = [2, 4, 8, 16, 32] 93 | anchor_ratios = [0.5, 1, 2] 94 | simga_rpn = 3 95 | 96 | rng_seed = 3 97 | EPS = 1e-14 98 | 99 | # ------------------------------------ TRAIN config -----------------------# 100 | train_batch_per_gpu = 2 101 | test_batch_per_gpu = 1 102 | bn_training = False 103 | tb_dump_interval = 500 104 | nr_image_per_epoch = 80000 # detectron 1x setting 105 | basic_lr = 5e-4 * train_batch_per_gpu * 1.25 106 | momentum = 0.9 107 | weight_decay = 0.0001 108 | 109 | from utils.tf_utils import lr_policy 110 | max_epoch = 30 111 | warm_iter = 500 112 | warm_fractor = 1.0 / 3.0 113 | multi_stage_lr_policy = lr_policy.MultiStageLR( 114 | [[19, basic_lr], [25, basic_lr * 0.1], [30, basic_lr * 0.01]]) 115 | 116 | def get_lr(self, epoch): 117 | return self.multi_stage_lr_policy.get_lr(epoch) 118 | 119 | # -----------------------------traditional rcnn config --------------------# 120 | TRAIN = edict() 121 | 122 | TRAIN.HAS_RPN = True 123 | TRAIN.DOUBLE_BIAS = False 124 | TRAIN.BIAS_DECAY = False 125 | TRAIN.USE_GT = True 126 | TRAIN.TRUNCATED = False 127 | TRAIN.ASPECT_GROUPING = True 128 | 129 | TRAIN.FG_FRACTION = 0.25 130 | TRAIN.FG_THRESH = 0.5 131 | TRAIN.BG_THRESH_HI = 0.5 132 | TRAIN.BG_THRESH_LO = 0.0 133 | TRAIN.BBOX_REG = True 134 | TRAIN.BBOX_THRESH = 0.5 135 | 136 | TRAIN.BATCH_SIZE = -1 # rcnn batch size 137 | TRAIN.nr_ohem_sampling = 256 * train_batch_per_gpu 138 | 139 | TRAIN.BBOX_NORMALIZE_TARGETS = True 140 | # Deprecated (inside weights) 141 | TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 142 | TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True 143 | TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 144 | TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 145 | 146 | TRAIN.RPN_NORMALIZE_TARGETS = False 147 | TRAIN.RPN_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 148 | TRAIN.RPN_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 149 | 150 | # IOU >= thresh: positive example 151 | TRAIN.RPN_POSITIVE_OVERLAP = 0.7 152 | TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 153 | TRAIN.RPN_CLOBBER_POSITIVES = False 154 | TRAIN.RPN_FG_FRACTION = 0.5 155 | TRAIN.RPN_BATCHSIZE = 256 156 | TRAIN.RPN_NMS_THRESH = 0.7 157 | # __C.TRAIN.RPN_MIN_SIZE = 16 158 | TRAIN.USE_ALL_GT = True 159 | TRAIN.RPN_PRE_NMS_TOP_N = 12000 160 | TRAIN.RPN_POST_NMS_TOP_N = 2000 161 | 162 | TEST = edict() 163 | TEST.BBOX_REG = True 164 | TEST.HAS_RPN = True 165 | TEST.RPN_NMS_THRESH = 0.7 166 | TEST.RPN_PRE_NMS_TOP_N = 6000 167 | TEST.RPN_POST_NMS_TOP_N = 1000 168 | 169 | 170 | config = Config() 171 | cfg = config 172 | 173 | 174 | def link_log_dir(): 175 | if not os.path.exists(osp.join(config.this_model_dir, 'log')): 176 | cmd = "ln -s " + config.output_dir + " log" 177 | os.system(cmd) 178 | 179 | 180 | def link_tools_dir(): 181 | if not os.path.exists(osp.join(config.this_model_dir, 'tools')): 182 | cmd = "ln -s " + os.path.join(root_dir, 'tools') + " tools" 183 | os.system(cmd) 184 | 185 | def make_parser(): 186 | parser = argparse.ArgumentParser() 187 | parser.add_argument( 188 | '-log', '--linklog', default=False, action='store_true') 189 | parser.add_argument( 190 | '-tool', '--link_tools', default=False, action='store_true') 191 | 192 | return parser 193 | 194 | 195 | if __name__ == '__main__': 196 | parser = make_parser() 197 | args = parser.parse_args() 198 | if args.linklog: 199 | link_log_dir() 200 | if args.link_tools: 201 | link_tools_dir() 202 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/datasets_odgt/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/datasets_odgt/coco.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | class COCOBasic: 8 | class_names = [ 9 | 'background', 'person', 'bicycle', 'car', 'motorcycle', 10 | 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 11 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 12 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 13 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 14 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 15 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 16 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 17 | 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 18 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 19 | 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 20 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 21 | 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 22 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 23 | 'book', 'clock', 'vase', 'scissors', 'teddy bear', 24 | 'hair drier', 'toothbrush'] 25 | classes_originID = { 26 | 'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 27 | 'airplane': 5, 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 28 | 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 29 | 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, 30 | 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 31 | 'bear': 23, 'zebra': 24, 'giraffe': 25, 'backpack': 27, 32 | 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 33 | 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 34 | 'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 35 | 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 36 | 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48, 37 | 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 'apple': 53, 38 | 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 39 | 'hot dog': 58, 'pizza': 59, 'donut': 60, 'cake': 61, 40 | 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65, 41 | 'dining table': 67, 'toilet': 70, 'tv': 72, 'laptop': 73, 42 | 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77, 43 | 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 44 | 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 45 | 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 46 | 'toothbrush': 90} 47 | num_classes = 81 48 | 49 | 50 | class COCO(COCOBasic): 51 | pass 52 | # train_root_folder = '' 53 | # train_source = os.path.join( 54 | # config.root_dir, 'data', 'MSCOCO/odformat/coco_trainvalmini.odgt') 55 | # eval_root_folder = '' 56 | # eval_source = os.path.join( 57 | # config.root_dir, 'data', 'MSCOCO/odformat/coco_minival2014.odgt') 58 | # eval_json = os.path.join( 59 | # config.root_dir, 'data', 'MSCOCO/instances_minival2014.json') 60 | 61 | 62 | if __name__ == "__main__": 63 | # coco = COCOIns() 64 | from IPython import embed 65 | embed() 66 | -------------------------------------------------------------------------------- /lib/datasets_odgt/cocoval.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | from config import config 8 | import argparse 9 | import os, sys 10 | from pycocotools.coco import COCO 11 | from pycocotools.cocoeval import COCOeval 12 | from IPython import embed 13 | 14 | def cocoval(detected_json): 15 | eval_json = config.eval_json 16 | eval_gt = COCO(eval_json) 17 | 18 | eval_dt = eval_gt.loadRes(detected_json) 19 | cocoEval = COCOeval(eval_gt, eval_dt, iouType='bbox') 20 | 21 | # cocoEval.params.imgIds = eval_gt.getImgIds() 22 | cocoEval.evaluate() 23 | cocoEval.accumulate() 24 | cocoEval.summarize() 25 | 26 | if __name__ == "__main__": 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('json', type=str, help='json to eval') 29 | 30 | args = parser.parse_args() 31 | cocoval(args.json) 32 | # from config import config 33 | 34 | # eval_json = config.eval_json 35 | -------------------------------------------------------------------------------- /lib/datasets_odgt/lib_coco/PythonAPI/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python3 setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python3 setup.py build_ext install --user 9 | rm -rf build 10 | -------------------------------------------------------------------------------- /lib/datasets_odgt/lib_coco/PythonAPI/pycocoEvalDemo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "from pycocotools.coco import COCO\n", 14 | "from pycocotools.cocoeval import COCOeval\n", 15 | "import numpy as np\n", 16 | "import skimage.io as io\n", 17 | "import pylab\n", 18 | "pylab.rcParams['figure.figsize'] = (10.0, 8.0)" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": { 25 | "collapsed": false 26 | }, 27 | "outputs": [ 28 | { 29 | "name": "stdout", 30 | "output_type": "stream", 31 | "text": [ 32 | "Running demo for *bbox* results.\n" 33 | ] 34 | } 35 | ], 36 | "source": [ 37 | "annType = ['segm','bbox','keypoints']\n", 38 | "annType = annType[1] #specify type here\n", 39 | "prefix = 'person_keypoints' if annType=='keypoints' else 'instances'\n", 40 | "print 'Running demo for *%s* results.'%(annType)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 3, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [ 50 | { 51 | "name": "stdout", 52 | "output_type": "stream", 53 | "text": [ 54 | "loading annotations into memory...\n", 55 | "Done (t=8.01s)\n", 56 | "creating index...\n", 57 | "index created!\n" 58 | ] 59 | } 60 | ], 61 | "source": [ 62 | "#initialize COCO ground truth api\n", 63 | "dataDir='../'\n", 64 | "dataType='val2014'\n", 65 | "annFile = '%s/annotations/%s_%s.json'%(dataDir,prefix,dataType)\n", 66 | "cocoGt=COCO(annFile)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 4, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stdout", 78 | "output_type": "stream", 79 | "text": [ 80 | "Loading and preparing results... \n", 81 | "DONE (t=0.05s)\n", 82 | "creating index...\n", 83 | "index created!\n" 84 | ] 85 | } 86 | ], 87 | "source": [ 88 | "#initialize COCO detections api\n", 89 | "resFile='%s/results/%s_%s_fake%s100_results.json'\n", 90 | "resFile = resFile%(dataDir, prefix, dataType, annType)\n", 91 | "cocoDt=cocoGt.loadRes(resFile)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 5, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "imgIds=sorted(cocoGt.getImgIds())\n", 103 | "imgIds=imgIds[0:100]\n", 104 | "imgId = imgIds[np.random.randint(100)]" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 6, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "Running per image evaluation... \n", 119 | "DONE (t=0.46s).\n", 120 | "Accumulating evaluation results... \n", 121 | "DONE (t=0.38s).\n", 122 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.505\n", 123 | " Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.697\n", 124 | " Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.573\n", 125 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.586\n", 126 | " Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.519\n", 127 | " Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.501\n", 128 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.387\n", 129 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.594\n", 130 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.595\n", 131 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.640\n", 132 | " Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.566\n", 133 | " Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.564\n" 134 | ] 135 | } 136 | ], 137 | "source": [ 138 | "# running evaluation\n", 139 | "cocoEval = COCOeval(cocoGt,cocoDt,annType)\n", 140 | "cocoEval.params.imgIds = imgIds\n", 141 | "cocoEval.evaluate()\n", 142 | "cocoEval.accumulate()\n", 143 | "cocoEval.summarize()" 144 | ] 145 | } 146 | ], 147 | "metadata": { 148 | "kernelspec": { 149 | "display_name": "Python 2", 150 | "language": "python", 151 | "name": "python2" 152 | }, 153 | "language_info": { 154 | "codemirror_mode": { 155 | "name": "ipython", 156 | "version": 2 157 | }, 158 | "file_extension": ".py", 159 | "mimetype": "text/x-python", 160 | "name": "python", 161 | "nbconvert_exporter": "python", 162 | "pygments_lexer": "ipython2", 163 | "version": "2.7.10" 164 | } 165 | }, 166 | "nbformat": 4, 167 | "nbformat_minor": 0 168 | } 169 | -------------------------------------------------------------------------------- /lib/datasets_odgt/lib_coco/PythonAPI/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/datasets_odgt/lib_coco/PythonAPI/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /lib/datasets_odgt/lib_coco/PythonAPI/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | 'pycocotools._mask', 12 | sources=['../common/maskApi.c', 'pycocotools/_mask.pyx'], 13 | include_dirs = [np.get_include(), '../common'], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | packages=['pycocotools'], 20 | package_dir = {'pycocotools': 'pycocotools'}, 21 | version='2.0', 22 | ext_modules= 23 | cythonize(ext_modules) 24 | ) -------------------------------------------------------------------------------- /lib/datasets_odgt/lib_coco/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /lib/datasets_odgt/lib_coco/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /lib/detection_opr/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /lib/detection_opr/box_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | if __name__ == '__main__': 8 | pass -------------------------------------------------------------------------------- /lib/detection_opr/box_utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Sergey Karayev 7 | # -------------------------------------------------------- 8 | 9 | cimport cython 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | DTYPE = np.float 14 | ctypedef np.float_t DTYPE_t 15 | 16 | def bbox_overlaps_float( 17 | np.ndarray[DTYPE_t, ndim=2] boxes, 18 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 19 | """ 20 | Parameters 21 | ---------- 22 | boxes: (N, 4) ndarray of float 23 | query_boxes: (K, 4) ndarray of float 24 | Returns 25 | ------- 26 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 27 | """ 28 | cdef unsigned int N = boxes.shape[0] 29 | cdef unsigned int K = query_boxes.shape[0] 30 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 31 | cdef DTYPE_t iw, ih, box_area 32 | cdef DTYPE_t ua 33 | cdef unsigned int k, n 34 | for k in range(K): 35 | box_area = ( 36 | (query_boxes[k, 2] - query_boxes[k, 0]) * 37 | (query_boxes[k, 3] - query_boxes[k, 1]) 38 | ) 39 | for n in range(N): 40 | iw = ( 41 | min(boxes[n, 2], query_boxes[k, 2]) - 42 | max(boxes[n, 0], query_boxes[k, 0]) 43 | ) 44 | if iw > 0: 45 | ih = ( 46 | min(boxes[n, 3], query_boxes[k, 3]) - 47 | max(boxes[n, 1], query_boxes[k, 1]) 48 | ) 49 | if ih > 0: 50 | ua = float( 51 | (boxes[n, 2] - boxes[n, 0]) * 52 | (boxes[n, 3] - boxes[n, 1]) + 53 | box_area - iw * ih 54 | ) 55 | # if query_boxes[k, 4] == -1: 56 | # ua = float((boxes[n, 2] - boxes[n, 0]) 57 | # *(boxes[n, 3] - boxes[n, 1])) 58 | # else: 59 | # ua = float( 60 | # (boxes[n, 2] - boxes[n, 0]) * 61 | # (boxes[n, 3] - boxes[n, 1]) + 62 | # box_area - iw * ih 63 | # ) 64 | overlaps[n, k] = iw * ih / ua 65 | return overlaps 66 | 67 | def bbox_overlaps( 68 | np.ndarray[DTYPE_t, ndim=2] boxes, 69 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 70 | """ 71 | Parameters 72 | ---------- 73 | boxes: (N, 4) ndarray of float 74 | query_boxes: (K, 4) ndarray of float 75 | Returns 76 | ------- 77 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 78 | """ 79 | cdef unsigned int N = boxes.shape[0] 80 | cdef unsigned int K = query_boxes.shape[0] 81 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 82 | cdef DTYPE_t iw, ih, box_area 83 | cdef DTYPE_t ua 84 | cdef unsigned int k, n 85 | for k in range(K): 86 | box_area = ( 87 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 88 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 89 | ) 90 | for n in range(N): 91 | iw = ( 92 | min(boxes[n, 2], query_boxes[k, 2]) - 93 | max(boxes[n, 0], query_boxes[k, 0]) + 1 94 | ) 95 | if iw > 0: 96 | ih = ( 97 | min(boxes[n, 3], query_boxes[k, 3]) - 98 | max(boxes[n, 1], query_boxes[k, 1]) + 1 99 | ) 100 | if ih > 0: 101 | ua = float( 102 | (boxes[n, 2] - boxes[n, 0] + 1) * 103 | (boxes[n, 3] - boxes[n, 1] + 1) + 104 | box_area - iw * ih 105 | ) 106 | overlaps[n, k] = iw * ih / ua 107 | return overlaps 108 | 109 | def bbox_overlaps_self( 110 | np.ndarray[DTYPE_t, ndim=2] boxes, 111 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 112 | """ 113 | Parameters 114 | ---------- 115 | boxes: (N, 4) ndarray of float 116 | query_boxes: (K, 4) ndarray of float 117 | Returns 118 | ------- 119 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 120 | """ 121 | cdef unsigned int N = boxes.shape[0] 122 | cdef unsigned int K = query_boxes.shape[0] 123 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 124 | cdef DTYPE_t iw, ih, box_area 125 | cdef DTYPE_t ua 126 | cdef unsigned int k, n 127 | for k in range(K): 128 | box_area = ( 129 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 130 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 131 | ) 132 | for n in range(N): 133 | iw = ( 134 | min(boxes[n, 2], query_boxes[k, 2]) - 135 | max(boxes[n, 0], query_boxes[k, 0]) + 1 136 | ) 137 | if iw > 0: 138 | ih = ( 139 | min(boxes[n, 3], query_boxes[k, 3]) - 140 | max(boxes[n, 1], query_boxes[k, 1]) + 1 141 | ) 142 | if ih > 0: 143 | ua = float(box_area) 144 | overlaps[n, k] = iw * ih / ua 145 | return overlaps 146 | 147 | def bbox_overlaps_ign( 148 | np.ndarray[DTYPE_t, ndim=2] boxes, 149 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 150 | """ 151 | Parameters 152 | ---------- 153 | boxes: (N, 4) ndarray of float 154 | query_boxes: (K, 4) ndarray of float 155 | Returns 156 | ------- 157 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes w.r.t. box area 158 | """ 159 | cdef unsigned int N = boxes.shape[0] 160 | cdef unsigned int K = query_boxes.shape[0] 161 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 162 | cdef DTYPE_t iw, ih, box_area 163 | cdef DTYPE_t ua 164 | cdef unsigned int k, n 165 | for k in range(K): 166 | for n in range(N): 167 | box_area = ( 168 | (boxes[n, 2] - boxes[n, 0] + 1) * 169 | (boxes[n, 3] - boxes[n, 1] + 1) 170 | ) 171 | iw = ( 172 | min(boxes[n, 2], query_boxes[k, 2]) - 173 | max(boxes[n, 0], query_boxes[k, 0]) + 1 174 | ) 175 | if iw > 0: 176 | ih = ( 177 | min(boxes[n, 3], query_boxes[k, 3]) - 178 | max(boxes[n, 1], query_boxes[k, 1]) + 1 179 | ) 180 | if ih > 0: 181 | ua = float(box_area) 182 | overlaps[n, k] = iw * ih / ua 183 | return overlaps 184 | 185 | -------------------------------------------------------------------------------- /lib/detection_opr/box_utils/bbox_opr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | import numpy as np 8 | from IPython import embed 9 | 10 | import megbrain as mgb 11 | import megskull as mgsk 12 | from megskull.opr.all import Concat, CondTake, ZerosLike, Log, Exp, Max, Min 13 | from megskull.cblk.elemwise import safelog 14 | 15 | 16 | def _concat_new_axis(t1, t2, t3, t4, axis=1): 17 | return Concat( 18 | [t1.add_axis(-1), t2.add_axis(-1), t3.add_axis(-1), t4.add_axis(-1)], 19 | axis=axis) 20 | 21 | 22 | def _box_ltrb_to_cs_opr(bbox, addaxis=None): 23 | """ transform the left-top right-bottom encoding bounding boxes 24 | to center and size encodings""" 25 | bbox_width = bbox[:, 2] - bbox[:, 0] + 1 26 | bbox_height = bbox[:, 3] - bbox[:, 1] + 1 27 | bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width 28 | bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height 29 | if addaxis is None: 30 | return bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y 31 | else: 32 | return bbox_width.add_axis(addaxis), bbox_height.add_axis( 33 | addaxis), bbox_ctr_x.add_axis(addaxis), bbox_ctr_y.add_axis(addaxis) 34 | 35 | 36 | def clip_boxes_opr(boxes, im_info): 37 | """ Clip the boxes into the image region.""" 38 | # x1 >=0 39 | box_x1 = Max(Min(boxes[:, 0::4], im_info[1] - 1), 0) 40 | # y1 >=0 41 | box_y1 = Max(Min(boxes[:, 1::4], im_info[0] - 1), 0) 42 | # x2 < im_info[1] 43 | box_x2 = Max(Min(boxes[:, 2::4], im_info[1] - 1), 0) 44 | # y2 < im_info[0] 45 | box_y2 = Max(Min(boxes[:, 3::4], im_info[0] - 1), 0) 46 | 47 | # clip_box = Concat([box_x1, box_y1, box_x2, box_y2], axis=1) 48 | clip_box = _concat_new_axis(box_x1, box_y1, box_x2, box_y2, 2)\ 49 | .reshape(boxes.shape[0], -1) 50 | 51 | return clip_box 52 | 53 | def filter_boxes_opr(boxes, min_size): 54 | """Remove all boxes with any side smaller than min_size.""" 55 | ws = boxes[:, 2] - boxes[:, 0] + 1 56 | hs = boxes[:, 3] - boxes[:, 1] + 1 57 | # keep = np.where((ws >= min_size) & (hs >= min_size))[0] 58 | keep = (ws >= min_size) * (hs >= min_size) 59 | # NOTE: In FPN, I have met np.all(keep) = 0(don't know why), 60 | # thus I add the following line to avoid crash 61 | # keep = keep + (keep.sum().eq(0)) 62 | 63 | keep_index = CondTake(keep, keep, 'EQ', 1).outputs[1] 64 | return keep_index 65 | 66 | 67 | def filter_anchors_opr( 68 | all_anchors, im_height, im_width, allowed_border_height, 69 | allowed_border_width=None): 70 | if allowed_border_width is None: 71 | allowed_border_width = allowed_border_height 72 | inds_inside = (all_anchors[:, 0] >= -allowed_border_width) * \ 73 | (all_anchors[:, 1] >= -allowed_border_height) * \ 74 | (all_anchors[:, 2] < im_width + allowed_border_width) * \ 75 | (all_anchors[:, 3] < im_height + allowed_border_height) 76 | 77 | inds_inside = CondTake(inds_inside, inds_inside, 'EQ', 1).outputs[1] 78 | return inds_inside 79 | 80 | 81 | def bbox_transform_opr(bbox, gt): 82 | """ Transform the bounding box and ground truth to the loss targets. 83 | The 4 box coordinates are in axis 1""" 84 | 85 | bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y = _box_ltrb_to_cs_opr(bbox) 86 | gt_width, gt_height, gt_ctr_x, gt_ctr_y = _box_ltrb_to_cs_opr(gt) 87 | 88 | target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width 89 | target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height 90 | target_dw = Log(gt_width / bbox_width) 91 | target_dh = Log(gt_height / bbox_height) 92 | # target = Concat([target_dx, target_dy, target_dw, target_dh], axis=1) 93 | target = _concat_new_axis(target_dx, target_dy, target_dw, target_dh) 94 | return target 95 | 96 | 97 | def bbox_transform_inv_opr(anchors, deltas): 98 | """ Transforms the learned deltas to the final bbox coordinates, the axis is 1""" 99 | anchor_width, anchor_height, anchor_ctr_x, anchor_ctr_y = \ 100 | _box_ltrb_to_cs_opr(anchors, 1) 101 | pred_ctr_x = anchor_ctr_x + deltas[:, 0::4] * anchor_width 102 | pred_ctr_y = anchor_ctr_y + deltas[:, 1::4] * anchor_height 103 | pred_width = anchor_width * Exp(deltas[:, 2::4]) 104 | pred_height = anchor_height * Exp(deltas[:, 3::4]) 105 | 106 | pred_x1 = pred_ctr_x - 0.5 * pred_width 107 | pred_y1 = pred_ctr_y - 0.5 * pred_height 108 | pred_x2 = pred_ctr_x + 0.5 * pred_width 109 | pred_y2 = pred_ctr_y + 0.5 * pred_height 110 | 111 | pred_box = _concat_new_axis(pred_x1, pred_y1, pred_x2, pred_y2, 2) 112 | pred_box = pred_box.reshape(pred_box.shape[0], -1) 113 | 114 | return pred_box 115 | 116 | 117 | def box_overlap_opr(box, gt): 118 | """ 119 | Compute the overlaps between box and gt(_box) 120 | box: (N, 4) Tensor 121 | gt : (K, 4) Tensor 122 | return: (N, K) Tensor, stores Max(0, intersection/union) 123 | """ 124 | N = box.shape[0] 125 | K = gt.shape[0] 126 | target_shape = (N, K, 4) 127 | b_box = box.add_axis(1).broadcast(target_shape) 128 | b_gt = gt.add_axis(0).broadcast(target_shape) 129 | 130 | iw = ( 131 | Min(b_box[:, :, 2], b_gt[:, :, 2]) - \ 132 | Max(b_box[:, :, 0], b_gt[:, :, 0]) + 1) 133 | ih = ( 134 | Min(b_box[:, :, 3], b_gt[:, :, 3]) - \ 135 | Max(b_box[:, :, 1], b_gt[:, :, 1]) + 1) 136 | inter = Max(iw, 0) * Max(ih, 0) 137 | 138 | # Use the broadcast to save some time 139 | area_box = (box[:, 2] - box[:, 0] + 1) * (box[:, 3] - box[:, 1] + 1) 140 | area_gt = (gt[:, 2] - gt[:, 0] + 1) * (gt[:, 3] - gt[:, 1] + 1) 141 | area_target_shape = (N, K) 142 | b_area_box = area_box.add_axis(1).broadcast(area_target_shape) 143 | b_area_gt = area_gt.add_axis(0).broadcast(area_target_shape) 144 | 145 | union = b_area_box + b_area_gt - inter 146 | 147 | overlaps = Max(inter / union, 0) 148 | return overlaps 149 | 150 | 151 | def box_overlap_ignore_opr(box, gt, *, ignore_label=-1, return_separate=False): 152 | """ 153 | Compute the overlaps between box and gt(_box) 154 | box: (N, 4) Tensor 155 | gt : (K, 5) Tensor, the last col shows the labels of gt 156 | return: (N, K) Tensor, stores Max(0, intersection/union) 157 | 158 | Here, we consider the ignore_label of gt boxes. When compute 159 | box vs ignored_gt, the overlap is replaced by inter / box_area. 160 | This operation will force the boxes near to ignore gt_boxes to 161 | be matched to ignored boxes rather than fg or bg labels. 162 | """ 163 | N = box.shape[0] 164 | K = gt.shape[0] 165 | target_shape = (N, K, 4) 166 | b_box = box.add_axis(1).broadcast(target_shape) 167 | b_gt = gt[:, :4].add_axis(0).broadcast(target_shape) 168 | 169 | # intersection of boxes 170 | iw = (Min(b_box[:, :, 2], b_gt[:, :, 2]) - \ 171 | Max(b_box[:, :, 0], b_gt[:, :, 0]) + 1) 172 | ih = (Min(b_box[:, :, 3], b_gt[:, :, 3]) - \ 173 | Max(b_box[:, :, 1], b_gt[:, :, 1]) + 1) 174 | inter = Max(iw, 0) * Max(ih, 0) 175 | 176 | # Use the broadcast to save some time 177 | area_box = (box[:, 2] - box[:, 0] + 1) * (box[:, 3] - box[:, 1] + 1) 178 | area_gt = (gt[:, 2] - gt[:, 0] + 1) * (gt[:, 3] - gt[:, 1] + 1) 179 | area_target_shape = (N, K) 180 | b_area_box = area_box.add_axis(1).broadcast(area_target_shape) 181 | b_area_gt = area_gt.add_axis(0).broadcast(area_target_shape) 182 | 183 | union = b_area_box + b_area_gt - inter 184 | 185 | overlaps_normal = Max(inter / union, 0) 186 | overlaps_ignore = Max(inter / b_area_box, 0) 187 | 188 | gt_ignore_mask = gt[:, 4].eq(ignore_label).add_axis(0).broadcast( 189 | area_target_shape) 190 | 191 | overlaps_normal *= (1 - gt_ignore_mask) 192 | overlaps_ignore *= gt_ignore_mask 193 | 194 | if return_separate: 195 | return overlaps_normal, overlaps_ignore 196 | else: 197 | overlaps = overlaps_normal + overlaps_ignore 198 | return overlaps 199 | -------------------------------------------------------------------------------- /lib/detection_opr/box_utils/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | def bbox_transform(ex_rois, gt_rois): 14 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 15 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 16 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 17 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 18 | 19 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 20 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 21 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 22 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 23 | 24 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 25 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 26 | targets_dw = np.log(gt_widths / ex_widths) 27 | targets_dh = np.log(gt_heights / ex_heights) 28 | 29 | targets = np.vstack( 30 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 31 | return targets 32 | 33 | 34 | def bbox_transform_inv(boxes, deltas): 35 | if boxes.shape[0] == 0: 36 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 37 | 38 | boxes = boxes.astype(deltas.dtype, copy=False) 39 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 40 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 41 | ctr_x = boxes[:, 0] + 0.5 * widths 42 | ctr_y = boxes[:, 1] + 0.5 * heights 43 | 44 | dx = deltas[:, 0::4] 45 | dy = deltas[:, 1::4] 46 | dw = deltas[:, 2::4] 47 | dh = deltas[:, 3::4] 48 | 49 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 50 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 51 | pred_w = np.exp(dw) * widths[:, np.newaxis] 52 | pred_h = np.exp(dh) * heights[:, np.newaxis] 53 | 54 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 55 | # x1 56 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 57 | # y1 58 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 59 | # x2 60 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 61 | # y2 62 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 63 | 64 | return pred_boxes 65 | 66 | 67 | def clip_boxes(boxes, im_shape): 68 | """ 69 | Clip boxes to image boundaries. 70 | """ 71 | # x1 >= 0 72 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 73 | # y1 >= 0 74 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 75 | # x2 < im_shape[1] 76 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 77 | # y2 < im_shape[0] 78 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 79 | return boxes 80 | -------------------------------------------------------------------------------- /lib/detection_opr/box_utils/bbox_transform_opr.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import tensorflow as tf 13 | 14 | #todo change bbox_transform to oprerator 15 | def bbox_transform(ex_rois, gt_rois): 16 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 17 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 18 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 19 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 20 | 21 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 22 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 23 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 24 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 25 | 26 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 27 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 28 | targets_dw = np.log(gt_widths / ex_widths) 29 | targets_dh = np.log(gt_heights / ex_heights) 30 | 31 | targets = np.vstack( 32 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 33 | return targets 34 | 35 | 36 | def _concat_new_axis(t1, t2, t3, t4, axis): 37 | return tf.concat( 38 | [tf.expand_dims(t1, -1), tf.expand_dims(t2, -1), 39 | tf.expand_dims(t3, -1), tf.expand_dims(t4, -1)], axis=axis) 40 | 41 | 42 | def bbox_transform_inv(boxes, deltas): 43 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 44 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 45 | ctr_x = tf.expand_dims(boxes[:, 0] + 0.5 * widths, -1) 46 | ctr_y = tf.expand_dims(boxes[:, 1] + 0.5 * heights, -1) 47 | 48 | dx = deltas[:, 0::4] 49 | dy = deltas[:, 1::4] 50 | dw = deltas[:, 2::4] 51 | dh = deltas[:, 3::4] 52 | 53 | widths = tf.expand_dims(widths, -1) 54 | heights = tf.expand_dims(heights, -1) 55 | 56 | pred_ctr_x = dx * widths + ctr_x 57 | pred_ctr_y = dy * heights + ctr_y 58 | pred_w = tf.exp(dw) * widths 59 | pred_h = tf.exp(dh) * heights 60 | 61 | # x1 62 | # pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 63 | pred_x1 = pred_ctr_x - 0.5 * pred_w 64 | # y1 65 | # pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 66 | pred_y1 = pred_ctr_y - 0.5 * pred_h 67 | # x2 68 | # pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 69 | pred_x2 = pred_ctr_x + 0.5 * pred_w 70 | # y2 71 | # pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 72 | pred_y2 = pred_ctr_y + 0.5 * pred_h 73 | 74 | pred_boxes = _concat_new_axis(pred_x1, pred_y1, pred_x2, pred_y2, 2) 75 | pred_boxes = tf.reshape(pred_boxes, (tf.shape(pred_boxes)[0], -1)) 76 | return pred_boxes 77 | 78 | 79 | def clip_boxes(boxes, im_shape): 80 | """ 81 | Clip boxes to image boundaries. 82 | """ 83 | # x1 >= 0 84 | x1 = tf.maximum(tf.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 85 | # y1 >= 0 86 | y1 = tf.maximum(tf.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 87 | # x2 < im_shape[1] 88 | x2 = tf.maximum(tf.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 89 | # y2 < im_shape[0] 90 | y2 = tf.maximum(tf.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 91 | 92 | pred_boxes = _concat_new_axis(x1, y1, x2, y2, 2) 93 | pred_boxes = tf.reshape(pred_boxes, (tf.shape(pred_boxes)[0], -1)) 94 | return pred_boxes 95 | -------------------------------------------------------------------------------- /lib/detection_opr/box_utils/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding=utf-8 3 | from distutils.core import setup, Extension 4 | from Cython.Build import cythonize 5 | import numpy as np 6 | 7 | package = Extension('bbox', ['bbox.pyx']) 8 | setup(ext_modules=cythonize([package]), include_dirs=[np.get_include()]) 9 | -------------------------------------------------------------------------------- /lib/detection_opr/rfcn_plus_plus/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/detection_opr/rfcn_plus_plus/rfcn_plus_plus_opr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | The MIT License (MIT) 4 | 5 | Copyright (c) 2013 Thomas Park 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | 25 | @author: zeming li, yilun chen 26 | @contact: zengarden2009@gmail.com, gentlesky0@gmail.com 27 | """ 28 | 29 | import tensorflow as tf 30 | import tensorflow.contrib.slim as slim 31 | import numpy as np 32 | #from lib_kernel.lib_roifm_maxk_mask import roifm_maxk_mask_op 33 | from IPython import embed 34 | def global_context_module(bottom, prefix='', ks=15, chl_mid=256, chl_out=1024): 35 | initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) 36 | 37 | col_max = slim.conv2d(bottom, chl_mid, [ks, 1], 38 | trainable=True, activation_fn=None, 39 | weights_initializer=initializer, scope=prefix + '_conv%d_w_pre' % ks) 40 | col = slim.conv2d(col_max, chl_out, [1, ks], 41 | trainable=True, activation_fn=None, 42 | weights_initializer=initializer, scope=prefix + '_conv%d_w' % ks) 43 | 44 | row_max = slim.conv2d(bottom, chl_mid, [1, ks], 45 | trainable=True, activation_fn=None, 46 | weights_initializer=initializer, scope=prefix + '_conv%d_h_pre' % ks) 47 | 48 | row = slim.conv2d(row_max, chl_out, [ks, 1], 49 | trainable=True, activation_fn=None, 50 | weights_initializer=initializer, scope=prefix + '_conv%d_h' % ks) 51 | 52 | s = row + col 53 | return s 54 | 55 | def row_column_max_pooling(bottom, prefix='', window=(7, 7)): 56 | column_mx = slim.max_pool2d(bottom, [window[0], 1], 57 | stride=[window[0], 1], scope=prefix + '_column_max') 58 | row_mx = slim.max_pool2d(bottom, [1, window[1]], 59 | stride=[1, window[1]], scope=prefix + '_row_max') 60 | 61 | column_mean = slim.avg_pool2d(column_mx, [1, window[1]], 62 | stride=[1, window[1]], scope=prefix + '_column_mean') 63 | row_mean = slim.avg_pool2d(row_mx, [window[0], 1], 64 | stride=[window[0], 1], scope=prefix + '_row_mean') 65 | 66 | return row_mean + column_mean 67 | 68 | 69 | #def roifm_maxk_mask(bottom, k=[1,2,3,4,3,2,1]): 70 | # mask1 = roifm_maxk_mask_op.roifm_maxk_mask( 71 | # tf.transpose(bottom, [0, 3, 1, 2]), 72 | # k[0],k[1],k[2],k[3],k[4],k[5],k[6]) 73 | # mask1 = tf.transpose(mask1, [0, 2, 3, 1]) 74 | # 75 | # mask2 = roifm_maxk_mask_op.roifm_maxk_mask( 76 | # tf.transpose(bottom, [0, 3, 2, 1]), 77 | # k[0],k[1],k[2],k[3],k[4],k[5],k[6]) 78 | # mask2 = tf.transpose(mask2, [0, 3, 2, 1]) 79 | # 80 | # return tf.stop_gradient(mask1 + mask2) 81 | # 82 | # 83 | #def roifm_maxk_mask_layer(bottom, maxk=[1,2,3,4,3,2,1]): 84 | # batch, height, width, chl = bottom.shape 85 | # mask = np.zeros(bottom.shape,dtype=np.float32) 86 | # for b in range(batch): 87 | # for c in range(chl): 88 | # for h in range(height): 89 | # idx = np.argpartition(bottom[b,h,:,c], -maxk[h])[-maxk[h]:] 90 | # mask[b, h, idx, c] = 1.0 91 | # for w in range(width): 92 | # idx = np.argpartition(bottom[b,:,w,c], -maxk[w])[-maxk[w]:] 93 | # mask[b, idx, w, c] = 1.0 94 | # return mask 95 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn/anchor_target_layer_without_boxweight.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from config import cfg 13 | import numpy as np 14 | import numpy.random as npr 15 | from utils.cython_bbox import bbox_overlaps 16 | from detection_opr.utils.bbox_transform import bbox_transform 17 | 18 | 19 | def anchor_target_layer( 20 | gt_boxes, im_info, _feat_stride, all_anchors, num_anchors, 21 | is_restrict_bg=False): 22 | """Same as the anchor target layer in original Fast/er RCNN """ 23 | # A = num_anchors 24 | # K = total_anchors / num_anchors 25 | 26 | total_anchors = all_anchors.shape[0] 27 | im_info = im_info[0] 28 | 29 | # allow boxes to sit over the edge by a small amount 30 | _allowed_border = 0 31 | 32 | # only keep anchors inside the image 33 | inds_inside = np.where( 34 | (all_anchors[:, 0] >= -_allowed_border) & 35 | (all_anchors[:, 1] >= -_allowed_border) & 36 | (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width 37 | (all_anchors[:, 3] < im_info[0] + _allowed_border) # height 38 | )[0] 39 | 40 | anchors = all_anchors[inds_inside, :] 41 | 42 | # label: 1 is positive, 0 is negative, -1 is dont care 43 | labels = np.empty((len(inds_inside),), dtype=np.float32) 44 | labels.fill(-1) 45 | 46 | # overlaps between the anchors and the gt boxes 47 | overlaps = bbox_overlaps( 48 | np.ascontiguousarray(anchors, dtype=np.float), 49 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 50 | argmax_overlaps = overlaps.argmax(axis=1) 51 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 52 | gt_argmax_overlaps = overlaps.argmax(axis=0) 53 | gt_max_overlaps = overlaps[ 54 | gt_argmax_overlaps, np.arange(overlaps.shape[1])] 55 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 56 | 57 | if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: 58 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 59 | 60 | labels[gt_argmax_overlaps] = 1 61 | labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 62 | 63 | if cfg.TRAIN.RPN_CLOBBER_POSITIVES: 64 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 65 | 66 | num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) 67 | fg_inds = np.where(labels == 1)[0] 68 | if len(fg_inds) > num_fg: 69 | disable_inds = npr.choice( 70 | fg_inds, size=(len(fg_inds) - num_fg), replace=False) 71 | labels[disable_inds] = -1 72 | 73 | num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) 74 | if is_restrict_bg: 75 | num_bg = max(num_bg, num_fg * 1.5) 76 | bg_inds = np.where(labels == 0)[0] 77 | if len(bg_inds) > num_bg: 78 | disable_inds = npr.choice( 79 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 80 | labels[disable_inds] = -1 81 | 82 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) 83 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 84 | 85 | # map up to original set of anchors 86 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 87 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 88 | 89 | # labels 90 | # labels = labels.reshape((1, height, width, A)) 91 | rpn_labels = labels.reshape((-1, 1)) 92 | 93 | # bbox_targets 94 | bbox_targets = bbox_targets.reshape((-1, 4)) 95 | rpn_bbox_targets = bbox_targets 96 | 97 | return rpn_labels, rpn_bbox_targets 98 | 99 | 100 | def _unmap(data, count, inds, fill=0): 101 | """ Unmap a subset of item (data) back to the original set of items (of 102 | size count) """ 103 | if len(data.shape) == 1: 104 | ret = np.empty((count,), dtype=np.float32) 105 | ret.fill(fill) 106 | ret[inds] = data 107 | else: 108 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 109 | ret.fill(fill) 110 | ret[inds, :] = data 111 | return ret 112 | 113 | 114 | def _compute_targets(ex_rois, gt_rois): 115 | """Compute bounding-box regression targets for an image.""" 116 | targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype( 117 | np.float32, copy=False) 118 | if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() and cfg.TRAIN.RPN_NORMALIZE_TARGETS: 119 | assert cfg.TRAIN.RPN_NORMALIZE_MEANS is not None 120 | assert cfg.TRAIN.RPN_NORMALIZE_STDS is not None 121 | targets -= cfg.TRAIN.RPN_NORMALIZE_MEANS 122 | targets /= cfg.TRAIN.RPN_NORMALIZE_STDS 123 | return targets 124 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | 14 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 15 | # 16 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 17 | # >> anchors 18 | # 19 | # anchors = 20 | # 21 | # -83 -39 100 56 22 | # -175 -87 192 104 23 | # -359 -183 376 200 24 | # -55 -55 72 72 25 | # -119 -119 136 136 26 | # -247 -247 264 264 27 | # -35 -79 52 96 28 | # -79 -167 96 184 29 | # -167 -343 184 360 30 | 31 | # array([[ -83., -39., 100., 56.], 32 | # [-175., -87., 192., 104.], 33 | # [-359., -183., 376., 200.], 34 | # [ -55., -55., 72., 72.], 35 | # [-119., -119., 136., 136.], 36 | # [-247., -247., 264., 264.], 37 | # [ -35., -79., 52., 96.], 38 | # [ -79., -167., 96., 184.], 39 | # [-167., -343., 184., 360.]]) 40 | 41 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 42 | scales=2 ** np.arange(3, 6)): 43 | """ 44 | Generate anchor (reference) windows by enumerating aspect ratios X 45 | scales wrt a reference (0, 0, 15, 15) window. 46 | """ 47 | 48 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 49 | ratio_anchors = _ratio_enum(base_anchor, ratios) 50 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 51 | for i in range(ratio_anchors.shape[0])]) 52 | return anchors 53 | 54 | 55 | def _whctrs(anchor): 56 | """ 57 | Return width, height, x center, and y center for an anchor (window). 58 | """ 59 | 60 | w = anchor[2] - anchor[0] + 1 61 | h = anchor[3] - anchor[1] + 1 62 | x_ctr = anchor[0] + 0.5 * (w - 1) 63 | y_ctr = anchor[1] + 0.5 * (h - 1) 64 | return w, h, x_ctr, y_ctr 65 | 66 | 67 | def _mkanchors(ws, hs, x_ctr, y_ctr): 68 | """ 69 | Given a vector of widths (ws) and heights (hs) around a center 70 | (x_ctr, y_ctr), output a set of anchors (windows). 71 | """ 72 | 73 | ws = ws[:, np.newaxis] 74 | hs = hs[:, np.newaxis] 75 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 76 | y_ctr - 0.5 * (hs - 1), 77 | x_ctr + 0.5 * (ws - 1), 78 | y_ctr + 0.5 * (hs - 1))) 79 | return anchors 80 | 81 | 82 | def _ratio_enum(anchor, ratios): 83 | """ 84 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 85 | """ 86 | 87 | w, h, x_ctr, y_ctr = _whctrs(anchor) 88 | size = w * h 89 | size_ratios = size / ratios 90 | ws = np.round(np.sqrt(size_ratios)) 91 | hs = np.round(ws * ratios) 92 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 93 | return anchors 94 | 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | 108 | if __name__ == '__main__': 109 | import time 110 | 111 | t = time.time() 112 | a = generate_anchors() 113 | print(time.time() - t) 114 | print(a) 115 | from IPython import embed; 116 | 117 | embed() 118 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | import numpy as np 10 | 11 | from detection_opr.utils.bbox_transform import bbox_transform_inv, clip_boxes 12 | from detection_opr.utils.nms_wrapper import nms 13 | import tensorflow as tf 14 | from config import cfg 15 | 16 | 17 | def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, 18 | anchors, num_anchors, is_tfchannel=False): 19 | """A simplified version compared to fast/er RCNN 20 | For details please see the technical report 21 | """ 22 | if type(cfg_key) == bytes: 23 | cfg_key = cfg_key.decode('utf-8') 24 | if cfg_key == 'TRAIN': 25 | pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N 26 | post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N 27 | nms_thresh = cfg.TRAIN.RPN_NMS_THRESH 28 | else: 29 | pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N 30 | post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N 31 | nms_thresh = cfg.TEST.RPN_NMS_THRESH 32 | 33 | im_info = im_info[0] 34 | # from IPython import embed; embed() 35 | # Get the scores and bounding boxes 36 | if is_tfchannel: 37 | scores = rpn_cls_prob.reshape(-1, 2) 38 | scores = scores[:, 1] 39 | else: 40 | scores = rpn_cls_prob[:, :, :, num_anchors:] 41 | rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) 42 | # if cfg_key == 'TRAIN' and 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ 43 | if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ 44 | and cfg.TRAIN.RPN_NORMALIZE_TARGETS: 45 | rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS 46 | rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS 47 | 48 | scores = scores.reshape((-1, 1)) 49 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 50 | proposals = clip_boxes(proposals, im_info[:2]) 51 | 52 | # filter boxes 53 | min_size = 0 54 | if cfg_key == 'TRAIN': 55 | if 'RPN_MIN_SIZE' in cfg.TRAIN.keys(): 56 | min_size = cfg.TRAIN.RPN_MIN_SIZE 57 | elif cfg_key == 'TEST': 58 | if 'RPN_MIN_SIZE' in cfg.TEST.keys(): 59 | min_size = cfg.TEST.RPN_MIN_SIZE 60 | 61 | if min_size > 0: 62 | keep = _filter_boxes(proposals, min_size * im_info[2]) 63 | proposals = proposals[keep, :] 64 | scores = scores[keep] 65 | 66 | # Pick the top region proposals 67 | order = scores.ravel().argsort()[::-1] 68 | if pre_nms_topN > 0: 69 | order = order[:pre_nms_topN] 70 | proposals = proposals[order, :] 71 | scores = scores[order] 72 | 73 | # Non-maximal suppression 74 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 75 | 76 | if post_nms_topN > 0: 77 | keep = keep[:post_nms_topN] 78 | proposals = proposals[keep, :] 79 | scores = scores[keep] 80 | 81 | # Only support single image as input 82 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 83 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 84 | 85 | return blob, scores.flatten() 86 | 87 | 88 | def proposal_without_nms_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, 89 | feat_stride, anchors, num_anchors, 90 | is_tfchannel=False): 91 | if type(cfg_key) == bytes: 92 | cfg_key = cfg_key.decode('utf-8') 93 | 94 | if cfg_key == 'TRAIN': 95 | pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N 96 | else: 97 | pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N 98 | im_info = im_info[0] 99 | # Get the scores and bounding boxes 100 | if is_tfchannel: 101 | scores = rpn_cls_prob.reshape(-1, 2) 102 | scores = scores[:, 1] 103 | else: 104 | scores = rpn_cls_prob[:, :, :, num_anchors:] 105 | rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) 106 | 107 | if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ 108 | and cfg.TRAIN.RPN_NORMALIZE_TARGETS: 109 | rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS 110 | rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS 111 | 112 | scores = scores.reshape((-1, 1)) 113 | proposals = bbox_transform_inv(anchors, rpn_bbox_pred) 114 | proposals = clip_boxes(proposals, im_info[:2]) 115 | 116 | # filter boxes 117 | min_size = 0 118 | if cfg_key == 'TRAIN': 119 | if 'RPN_MIN_SIZE' in cfg.TRAIN.keys(): 120 | min_size = cfg.TRAIN.RPN_MIN_SIZE 121 | elif cfg_key == 'TEST': 122 | if 'RPN_MIN_SIZE' in cfg.TEST.keys(): 123 | min_size = cfg.TEST.RPN_MIN_SIZE 124 | if min_size > 0: 125 | keep = _filter_boxes(proposals, min_size * im_info[2]) 126 | proposals = proposals[keep, :] 127 | scores = scores[keep] 128 | 129 | # Pick the top region proposals 130 | order = scores.ravel().argsort()[::-1] 131 | if pre_nms_topN > 0: 132 | order = order[:pre_nms_topN] 133 | proposals = proposals[order, :] 134 | scores = scores[order].flatten() 135 | 136 | ##why add one, because tf nms assume x2,y2 does not include border 137 | proposals_addone = np.array(proposals) 138 | proposals_addone[:, 2] += 1 139 | proposals_addone[:, 3] += 1 140 | return proposals, scores, proposals_addone 141 | 142 | 143 | def _filter_boxes(boxes, min_size): 144 | """Remove all boxes with any side smaller than min_size.""" 145 | ws = boxes[:, 2] - boxes[:, 0] + 1 146 | hs = boxes[:, 3] - boxes[:, 1] + 1 147 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 148 | return keep 149 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick, Sean Bell and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | import numpy as np 11 | import numpy.random as npr 12 | from detection_opr.utils.bbox_transform import bbox_transform 13 | from utils.cython_bbox import bbox_overlaps 14 | from config import cfg 15 | from IPython import embed 16 | 17 | def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes): 18 | """ 19 | Assign object detection proposals to ground-truth targets. Produces proposal 20 | classification labels and bounding-box regression targets. 21 | """ 22 | 23 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 24 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 25 | all_rois = rpn_rois 26 | all_scores = rpn_scores 27 | 28 | # Include ground-truth boxes in the set of candidate rois 29 | if cfg.TRAIN.USE_GT: 30 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) 31 | all_rois = np.vstack( 32 | (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) 33 | ) 34 | # not sure if it a wise appending, but anyway i am not using it 35 | ones = np.ones((gt_boxes.shape[0]), dtype=gt_boxes.dtype) 36 | all_scores = np.hstack((all_scores, ones)) 37 | 38 | #num_images = 1 39 | #rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images 40 | rois_per_image = np.inf if cfg.TRAIN.BATCH_SIZE == -1 else cfg.TRAIN.BATCH_SIZE 41 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) 42 | 43 | # Sample rois with classification labels and bounding box regression 44 | # targets 45 | labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois( 46 | all_rois, all_scores, gt_boxes, fg_rois_per_image, 47 | rois_per_image, _num_classes) 48 | 49 | rois = rois.reshape(-1, 5) 50 | roi_scores = roi_scores.reshape(-1) 51 | labels = labels.reshape(-1, 1) 52 | bbox_targets = bbox_targets.reshape(-1, _num_classes * 4) 53 | bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4) 54 | bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) 55 | 56 | return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights 57 | 58 | 59 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 60 | """Bounding-box regression targets (bbox_target_data) are stored in a 61 | compact form N x (class, tx, ty, tw, th) 62 | 63 | This function expands those targets into the 4-of-4*K representation used 64 | by the network (i.e. only one class has non-zero targets). 65 | 66 | Returns: 67 | bbox_target (ndarray): N x 4K blob of regression targets 68 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 69 | """ 70 | 71 | clss = bbox_target_data[:, 0] 72 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 73 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 74 | inds = np.where(clss > 0)[0] 75 | for ind in inds: 76 | cls = clss[ind] 77 | start = int(4 * cls) 78 | end = start + 4 79 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 80 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS 81 | return bbox_targets, bbox_inside_weights 82 | 83 | 84 | def _compute_targets(ex_rois, gt_rois, labels): 85 | """Compute bounding-box regression targets for an image.""" 86 | 87 | assert ex_rois.shape[0] == gt_rois.shape[0] 88 | assert ex_rois.shape[1] == 4 89 | assert gt_rois.shape[1] == 4 90 | 91 | targets = bbox_transform(ex_rois, gt_rois) 92 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 93 | # Optionally normalize targets by a precomputed mean and stdev 94 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 95 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 96 | return np.hstack( 97 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False) 98 | 99 | 100 | def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, 101 | rois_per_image, num_classes): 102 | """Generate a random sample of RoIs comprising foreground and background 103 | examples. 104 | """ 105 | # overlaps: (rois x gt_boxes) 106 | overlaps = bbox_overlaps( 107 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), 108 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 109 | gt_assignment = overlaps.argmax(axis=1) 110 | max_overlaps = overlaps.max(axis=1) 111 | labels = gt_boxes[gt_assignment, 4] 112 | 113 | # Select foreground RoIs as those with >= FG_THRESH overlap 114 | fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] 115 | # Guard against the case when an image has fewer than fg_rois_per_image 116 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 117 | bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & 118 | (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 119 | 120 | # Small modification to the original version where we ensure a fixed number 121 | # of regions are sampled 122 | 123 | ''' 124 | if fg_inds.size > 0 and bg_inds.size > 0: 125 | fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) 126 | fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), 127 | replace=False) 128 | bg_rois_per_image = rois_per_image - fg_rois_per_image 129 | to_replace = bg_inds.size < bg_rois_per_image 130 | bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), 131 | replace=to_replace) 132 | elif fg_inds.size > 0: 133 | to_replace = fg_inds.size < rois_per_image 134 | fg_inds = npr.choice(fg_inds, size=int(rois_per_image), 135 | replace=to_replace) 136 | fg_rois_per_image = rois_per_image 137 | elif bg_inds.size > 0: 138 | to_replace = bg_inds.size < rois_per_image 139 | bg_inds = npr.choice(bg_inds, size=int(rois_per_image), 140 | replace=to_replace) 141 | fg_rois_per_image = 0 142 | else: 143 | import pdb 144 | pdb.set_trace() 145 | 146 | ''' 147 | # Guard against the case when an image has fewer than fg_rois_per_image 148 | # foreground RoIs 149 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) 150 | # Sample foreground regions without replacement 151 | if fg_inds.size > 0: 152 | fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_this_image), replace=False) 153 | # Compute number of background RoIs to take from this image (guarding 154 | # against there being fewer than desired) 155 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 156 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 157 | # Sample background regions without replacement 158 | if bg_inds.size > 0: 159 | bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False) 160 | 161 | 162 | # The indices that we're selecting (both fg and bg) 163 | keep_inds = np.append(fg_inds, bg_inds) 164 | 165 | # pad more to ensure a fixed minibatch size 166 | #while keep_inds.shape[0] < rois_per_image: 167 | # gap = np.minimum(len(all_rois), rois_per_image - keep_inds.shape[0]) 168 | # gap_indexes = npr.choice(range(len(all_rois)), size=gap, replace=False) 169 | # keep_inds = np.append(keep_inds, gap_indexes) 170 | 171 | # Select sampled values from various arrays: 172 | labels = labels[keep_inds] 173 | # Clamp labels for the background RoIs to 0 174 | 175 | #*******labels[int(fg_rois_per_image):] = 0 176 | labels[int(fg_rois_per_this_image):] = 0 177 | rois = all_rois[keep_inds] 178 | roi_scores = all_scores[keep_inds] 179 | 180 | bbox_target_data = _compute_targets( 181 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) 182 | 183 | bbox_targets, bbox_inside_weights = \ 184 | _get_bbox_regression_labels(bbox_target_data, num_classes) 185 | 186 | return labels, rois, roi_scores, bbox_targets, bbox_inside_weights 187 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn/snippets.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | # import sys 7 | # sys.path.insert(0, '/unsullied/sharefs/lizeming/lzm_home_large/tf-faster-rfcn-multigpu/lib/') 8 | 9 | import numpy as np 10 | from detection_opr.rpn.generate_anchors import generate_anchors 11 | import tensorflow as tf 12 | 13 | 14 | def generate_anchors_pre( 15 | height, width, feat_stride, anchor_scales=(8, 16, 32), 16 | anchor_ratios=(0.5, 1, 2), base_size=16): 17 | """ A wrapper function to generate anchors given different scales 18 | Also return the number of anchors in variable 'length' 19 | """ 20 | anchors = generate_anchors( 21 | ratios=np.array(anchor_ratios), scales=np.array(anchor_scales), 22 | base_size=base_size) 23 | A = anchors.shape[0] 24 | shift_x = np.arange(0, width) * feat_stride 25 | shift_y = np.arange(0, height) * feat_stride 26 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 27 | shifts = np.vstack( 28 | (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), 29 | shift_y.ravel())).transpose() 30 | K = shifts.shape[0] 31 | # width changes faster, so here it is H, W, C 32 | anchors = anchors.reshape((1, A, 4)) + \ 33 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 34 | anchors = anchors.reshape((K * A, 4)).astype(np.float32, copy=False) 35 | length = np.int32(anchors.shape[0]) 36 | 37 | return anchors, length 38 | 39 | 40 | def generate_anchors_opr( 41 | height, width, feat_stride, anchor_scales=(8, 16, 32), 42 | anchor_ratios=(0.5, 1, 2), base_size=16): 43 | anchors = generate_anchors( 44 | ratios=np.array(anchor_ratios), scales=np.array(anchor_scales), 45 | base_size=base_size) 46 | shift_x = tf.range(width, dtype=np.float32) * feat_stride 47 | shift_y = tf.range(height, dtype=np.float32) * feat_stride 48 | shift_x, shift_y = tf.meshgrid(shift_x, shift_y) 49 | shifts = tf.stack( 50 | (tf.reshape(shift_x, (-1, 1)), tf.reshape(shift_y, (-1, 1)), 51 | tf.reshape(shift_x, (-1, 1)), tf.reshape(shift_y, (-1, 1)))) 52 | shifts = tf.transpose(shifts, [1, 0, 2]) 53 | final_anc = tf.constant(anchors.reshape((1, -1, 4)), dtype=np.float32) + \ 54 | tf.transpose(tf.reshape(shifts, (1, -1, 4)), (1, 0, 2)) 55 | return tf.reshape(final_anc, (-1, 4)) 56 | 57 | 58 | if __name__ == '__main__': 59 | import os 60 | os.environ["CUDA_VISIBLE_DEVICES"] = '3' 61 | hehe = generate_anchors_pre(150, 200, 1.0 / 16) 62 | h, w = tf.constant(150, dtype=np.float32), tf.constant(200, dtype=np.float32) 63 | haha = generate_anchors_opr(h, w, 1.0 / 16) 64 | sess = tf.Session() 65 | xixi = sess.run(haha) 66 | print(hehe[0] - xixi) 67 | embed() 68 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn_batched/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengarden/light_head_rcnn/790f94e4e1481fbc403b101a763d4a9df56ee32a/lib/detection_opr/rpn_batched/__init__.py -------------------------------------------------------------------------------- /lib/detection_opr/rpn_batched/anchor_target_layer_without_boxweight.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | from config import cfg 13 | import numpy as np 14 | import numpy.random as npr 15 | from utils.py_faster_rcnn_utils.cython_bbox import bbox_overlaps 16 | from detection_opr.utils.bbox_transform import bbox_transform 17 | 18 | 19 | def anchor_target_layer( 20 | batch_gt_boxes, batch_im_info, _feat_stride, all_anchors, num_anchors, 21 | is_restrict_bg=False): 22 | """Same as the anchor target layer in original Fast/er RCNN """ 23 | # A = num_anchors 24 | # K = total_anchors / num_anchors 25 | 26 | total_anchors = all_anchors.shape[0] 27 | batch = cfg.train_batch_per_gpu 28 | batch_rpn_labels = [] 29 | batch_rpn_bbox_targets = [] 30 | for b_id in range(batch): 31 | im_info = batch_im_info[b_id] 32 | gt_boxes = batch_gt_boxes[b_id][:int(im_info[5])] 33 | 34 | # allow boxes to sit over the edge by a small amount 35 | _allowed_border = 0 36 | 37 | # only keep anchors inside the image 38 | inds_inside = np.where( 39 | (all_anchors[:, 0] >= -_allowed_border) & 40 | (all_anchors[:, 1] >= -_allowed_border) & 41 | (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width 42 | (all_anchors[:, 3] < im_info[0] + _allowed_border) # height 43 | )[0] 44 | 45 | anchors = all_anchors[inds_inside, :] 46 | 47 | # label: 1 is positive, 0 is negative, -1 is dont care 48 | labels = np.empty((len(inds_inside),), dtype=np.float32) 49 | labels.fill(-1) 50 | 51 | # overlaps between the anchors and the gt boxes 52 | overlaps = bbox_overlaps( 53 | np.ascontiguousarray(anchors, dtype=np.float), 54 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 55 | 56 | argmax_overlaps = overlaps.argmax(axis=1) 57 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 58 | gt_argmax_overlaps = overlaps.argmax(axis=0) 59 | gt_max_overlaps = overlaps[ 60 | gt_argmax_overlaps, np.arange(overlaps.shape[1])] 61 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 62 | 63 | if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: 64 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 65 | 66 | labels[gt_argmax_overlaps] = 1 67 | labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 68 | 69 | if cfg.TRAIN.RPN_CLOBBER_POSITIVES: 70 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 71 | 72 | num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) 73 | fg_inds = np.where(labels == 1)[0] 74 | if len(fg_inds) > num_fg: 75 | disable_inds = npr.choice( 76 | fg_inds, size=(len(fg_inds) - num_fg), replace=False) 77 | labels[disable_inds] = -1 78 | 79 | num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) 80 | if is_restrict_bg: 81 | num_bg = max(num_bg, num_fg * 1.5) 82 | bg_inds = np.where(labels == 0)[0] 83 | if len(bg_inds) > num_bg: 84 | disable_inds = npr.choice( 85 | bg_inds, size=(len(bg_inds) - num_bg), replace=False) 86 | labels[disable_inds] = -1 87 | 88 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 89 | 90 | # map up to original set of anchors 91 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 92 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 93 | 94 | # labels = labels.reshape((1, height, width, A)) 95 | rpn_labels = labels.reshape((-1, 1)) 96 | 97 | # bbox_targets 98 | bbox_targets = bbox_targets.reshape((-1, 4)) 99 | rpn_bbox_targets = bbox_targets 100 | batch_rpn_labels.append(rpn_labels) 101 | batch_rpn_bbox_targets.append(rpn_bbox_targets) 102 | 103 | batch_rpn_labels = np.vstack(batch_rpn_labels) 104 | batch_rpn_bbox_targets = np.vstack(batch_rpn_bbox_targets) 105 | return batch_rpn_labels, batch_rpn_bbox_targets 106 | 107 | 108 | def _unmap(data, count, inds, fill=0): 109 | """ Unmap a subset of item (data) back to the original set of items (of 110 | size count) """ 111 | if len(data.shape) == 1: 112 | ret = np.empty((count,), dtype=np.float32) 113 | ret.fill(fill) 114 | ret[inds] = data 115 | else: 116 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 117 | ret.fill(fill) 118 | ret[inds, :] = data 119 | return ret 120 | 121 | 122 | def _compute_targets(ex_rois, gt_rois): 123 | """Compute bounding-box regression targets for an image.""" 124 | targets = bbox_transform(ex_rois, gt_rois[:, :4]).astype( 125 | np.float32, copy=False) 126 | if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() and cfg.TRAIN.RPN_NORMALIZE_TARGETS: 127 | assert cfg.TRAIN.RPN_NORMALIZE_MEANS is not None 128 | assert cfg.TRAIN.RPN_NORMALIZE_STDS is not None 129 | targets -= cfg.TRAIN.RPN_NORMALIZE_MEANS 130 | targets /= cfg.TRAIN.RPN_NORMALIZE_STDS 131 | return targets 132 | -------------------------------------------------------------------------------- /lib/detection_opr/rpn_batched/proposal_opr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | from IPython import embed 8 | from config import cfg 9 | from detection_opr.box_utils.bbox_transform_opr import bbox_transform_inv, \ 10 | clip_boxes 11 | 12 | import tensorflow as tf 13 | import numpy as np 14 | 15 | 16 | def filter_boxes(boxes, min_size): 17 | """Remove all boxes with any side smaller than min_size.""" 18 | ws = boxes[:, 2] - boxes[:, 0] + 1 19 | hs = boxes[:, 3] - boxes[:, 1] + 1 20 | keep = tf.where((ws >= min_size) & (hs >= min_size))[:, 0] 21 | return keep 22 | 23 | 24 | def proposal_opr( 25 | rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, 26 | num_anchors, is_tfchannel=False, is_tfnms=False): 27 | """ Proposal_layer with tensors 28 | """ 29 | if type(cfg_key) == bytes: 30 | cfg_key = cfg_key.decode('utf-8') 31 | 32 | if cfg_key == 'TRAIN': 33 | pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N 34 | post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N 35 | nms_thresh = cfg.TRAIN.RPN_NMS_THRESH 36 | batch = cfg.train_batch_per_gpu 37 | else: 38 | pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N 39 | post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N 40 | nms_thresh = cfg.TEST.RPN_NMS_THRESH 41 | batch = cfg.test_batch_per_gpu 42 | 43 | if is_tfchannel: 44 | scores = tf.reshape(rpn_cls_prob, (batch, -1, 2)) 45 | scores = scores[:, :, 1] 46 | rpn_bbox_pred = tf.reshape(rpn_bbox_pred, (batch, -1, 4)) 47 | else: 48 | from IPython import embed 49 | print("other channel type not implemented") 50 | embed() 51 | 52 | if 'RPN_NORMALIZE_TARGETS' in cfg.TRAIN.keys() \ 53 | and cfg.TRAIN.RPN_NORMALIZE_TARGETS: 54 | rpn_bbox_pred *= cfg.TRAIN.RPN_NORMALIZE_STDS 55 | rpn_bbox_pred += cfg.TRAIN.RPN_NORMALIZE_MEANS 56 | 57 | min_size = 0 58 | if cfg_key == 'TRAIN': 59 | if 'RPN_MIN_SIZE' in cfg.TRAIN.keys(): 60 | min_size = cfg.TRAIN.RPN_MIN_SIZE 61 | elif cfg_key == 'TEST': 62 | if 'RPN_MIN_SIZE' in cfg.TEST.keys(): 63 | min_size = cfg.TEST.RPN_MIN_SIZE 64 | 65 | batch_scores = [] 66 | batch_proposals = [] 67 | for b_id in range(batch): 68 | cur_im_info = im_info[b_id] 69 | cur_scores = scores[b_id] 70 | cur_rpn_bbox_pred = rpn_bbox_pred[b_id] 71 | 72 | cur_scores = tf.squeeze(tf.reshape(cur_scores, (-1, 1)), axis=1) 73 | cur_proposals = bbox_transform_inv(anchors, cur_rpn_bbox_pred) 74 | cur_proposals = clip_boxes(cur_proposals, cur_im_info[:2]) 75 | 76 | if min_size > 0: 77 | assert 'Set MIN_SIZE will make mode slow with tf.where opr' 78 | keep = filter_boxes(cur_proposals, min_size * cur_im_info[2]) 79 | cur_proposals = tf.gather(cur_proposals, keep, axis=0) 80 | cur_scores = tf.gather(cur_scores, keep, axis=0) 81 | 82 | if pre_nms_topN > 0: 83 | cur_order = tf.nn.top_k(cur_scores, pre_nms_topN, sorted=True)[1] 84 | cur_proposals = tf.gather(cur_proposals, cur_order, axis=0) 85 | cur_scores = tf.gather(cur_scores, cur_order, axis=0) 86 | 87 | if is_tfnms: 88 | tf_proposals = cur_proposals + np.array([0, 0, 1, 1]) 89 | keep = tf.image.non_max_suppression( 90 | tf_proposals, cur_scores, post_nms_topN, nms_thresh) 91 | else: 92 | from lib_kernel.lib_fast_nms import nms_op 93 | keep, keep_num, mask, _ = nms_op.nms( 94 | cur_proposals, nms_thresh, post_nms_topN) 95 | keep = keep[:keep_num[0]] 96 | 97 | cur_proposals = tf.gather(cur_proposals, keep, axis=0) 98 | cur_scores = tf.gather(cur_scores, keep, axis=0) 99 | 100 | batch_inds = tf.ones((tf.shape(cur_proposals)[0], 1)) * b_id 101 | rois = tf.concat((batch_inds, cur_proposals), axis=1) 102 | batch_proposals.append(rois) 103 | batch_scores.append(cur_scores) 104 | 105 | final_proposals = tf.concat(batch_proposals, axis=0) 106 | final_scores = tf.concat(batch_scores, axis=0) 107 | return final_proposals, final_scores 108 | 109 | def debug_single(x, y): 110 | from IPython import embed 111 | embed() 112 | return True 113 | -------------------------------------------------------------------------------- /lib/detection_opr/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/detection_opr/utils/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | def bbox_transform(ex_rois, gt_rois): 14 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 15 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 16 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 17 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 18 | 19 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 20 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 21 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 22 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 23 | 24 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 25 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 26 | targets_dw = np.log(gt_widths / ex_widths) 27 | targets_dh = np.log(gt_heights / ex_heights) 28 | 29 | targets = np.vstack( 30 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 31 | return targets 32 | 33 | 34 | def bbox_transform_inv(boxes, deltas): 35 | if boxes.shape[0] == 0: 36 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 37 | 38 | boxes = boxes.astype(deltas.dtype, copy=False) 39 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 40 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 41 | ctr_x = boxes[:, 0] + 0.5 * widths 42 | ctr_y = boxes[:, 1] + 0.5 * heights 43 | 44 | dx = deltas[:, 0::4] 45 | dy = deltas[:, 1::4] 46 | dw = deltas[:, 2::4] 47 | dh = deltas[:, 3::4] 48 | 49 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 50 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 51 | pred_w = np.exp(dw) * widths[:, np.newaxis] 52 | pred_h = np.exp(dh) * heights[:, np.newaxis] 53 | 54 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 55 | # x1 56 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 57 | # y1 58 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 59 | # x2 60 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 61 | # y2 62 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 63 | 64 | return pred_boxes 65 | 66 | 67 | def clip_boxes(boxes, im_shape): 68 | """ 69 | Clip boxes to image boundaries. 70 | """ 71 | # x1 >= 0 72 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 73 | # y1 >= 0 74 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 75 | # x2 < im_shape[1] 76 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 77 | # y2 < im_shape[0] 78 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 79 | return boxes 80 | 81 | -------------------------------------------------------------------------------- /lib/detection_opr/utils/loss_opr.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | import tensorflow as tf 8 | 9 | 10 | def _smooth_l1_loss_base(bbox_pred, bbox_targets, bbox_inside_weights, 11 | bbox_outside_weights, sigma=1.0, dim=[1]): 12 | sigma_2 = sigma ** 2 13 | box_diff = bbox_pred - bbox_targets 14 | in_box_diff = bbox_inside_weights * box_diff 15 | abs_in_box_diff = tf.abs(in_box_diff) 16 | smoothL1_sign = tf.stop_gradient( 17 | tf.to_float(tf.less(abs_in_box_diff, 1. / sigma_2))) 18 | in_loss_box = tf.pow(in_box_diff, 2) * (sigma_2 / 2.0) * smoothL1_sign \ 19 | + (abs_in_box_diff - (0.5 / sigma_2)) * (1.0 - smoothL1_sign) 20 | out_loss_box = bbox_outside_weights * in_loss_box 21 | return out_loss_box 22 | 23 | 24 | def softmax_layer(bottom, name): 25 | if name == 'rpn_cls_prob_reshape': 26 | input_shape = tf.shape(bottom) 27 | bottom_reshaped = tf.reshape(bottom, [-1, input_shape[-1]]) 28 | reshaped_score = tf.nn.softmax(bottom_reshaped, name=name) 29 | return tf.reshape(reshaped_score, input_shape) 30 | return tf.nn.softmax(bottom, name=name) 31 | 32 | 33 | def softmax_loss_ohem(cls_score, label, nr_ohem_sampling): 34 | cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 35 | logits=cls_score, labels=label) 36 | topk_val, topk_idx = tf.nn.top_k(cls_loss, k=nr_ohem_sampling, 37 | sorted=False, name='ohem_cls_loss_index') 38 | cls_loss_ohem = tf.gather(cls_loss, topk_idx, name='ohem_cls_loss') 39 | cls_loss_ohem = tf.reduce_sum(cls_loss_ohem) / nr_ohem_sampling 40 | return cls_loss_ohem 41 | 42 | 43 | # rpn do not direct div norm 44 | # 算loss 不算背景loss 45 | 46 | def smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, 47 | bbox_outside_weights, sigma=1.0, dim=[1]): 48 | value = _smooth_l1_loss_base( 49 | bbox_pred, bbox_targets, bbox_inside_weights, 50 | bbox_outside_weights, sigma=sigma, dim=[1]) 51 | loss = tf.reduce_mean(tf.reduce_sum(value, axis=dim)) 52 | return loss 53 | 54 | 55 | def smooth_l1_loss_ohem(bbox_pred, bbox_targets, bbox_inside_weights, 56 | bbox_outside_weights, nr_ohem_sampling, 57 | sigma=1.0, dim=[1]): 58 | box_loss_base = _smooth_l1_loss_base( 59 | bbox_pred, bbox_targets, bbox_inside_weights, 60 | bbox_outside_weights, sigma=sigma, dim=[1]) 61 | box_loss = tf.reduce_sum(box_loss_base, axis=dim) 62 | 63 | topk_val, topk_idx = tf.nn.top_k( 64 | box_loss, k=nr_ohem_sampling, 65 | sorted=False, name='ohem_box_loss_index') 66 | 67 | box_loss_ohem = tf.gather(box_loss, topk_idx, name='ohem_box_loss') 68 | box_loss_ohem = tf.reduce_sum(box_loss_ohem) / nr_ohem_sampling 69 | return box_loss_ohem 70 | 71 | 72 | def sum_ohem_loss(cls_score, label, bbox_pred, bbox_targets, 73 | bbox_inside_weights, bbox_outside_weights, 74 | nr_ohem_sampling, sigma=1.0, dim=[1]): 75 | cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 76 | logits=cls_score, labels=label) 77 | box_loss_base = _smooth_l1_loss_base(bbox_pred, bbox_targets, 78 | bbox_inside_weights, 79 | bbox_outside_weights, 80 | sigma=sigma, dim=[1]) 81 | 82 | box_loss = tf.reduce_sum(box_loss_base, axis=dim) 83 | cls_box_loss = cls_loss + box_loss 84 | 85 | nr_ohem_sampling = tf.minimum(nr_ohem_sampling, 86 | tf.shape(cls_box_loss)[0]) 87 | 88 | topk_val, topk_idx = tf.nn.top_k(cls_box_loss, k=nr_ohem_sampling, 89 | sorted=True, name='ohem_loss_index') 90 | 91 | cls_loss_ohem = tf.gather(cls_loss, topk_idx, name='ohem_cls_loss') 92 | box_loss_ohem = tf.gather(box_loss, topk_idx, name='ohem_box_loss') 93 | 94 | box_loss_ohem = tf.reduce_sum(box_loss_ohem) / \ 95 | tf.to_float(nr_ohem_sampling) 96 | cls_norm = tf.stop_gradient(tf.minimum(nr_ohem_sampling, 97 | tf.shape(topk_val)[0])) 98 | 99 | # db_cls_norm = tf.py_func(debug_single, [cls_loss, box_loss, topk_idx, 100 | # cls_loss_ohem, box_loss_ohem, cls_norm], [tf.bool]) 101 | # with tf.control_dependencies(db_cls_norm): 102 | cls_loss_ohem = tf.reduce_sum(cls_loss_ohem) / tf.to_float(cls_norm) 103 | 104 | return cls_loss_ohem, box_loss_ohem 105 | 106 | 107 | '''following are not tested''' 108 | 109 | 110 | def debug_single(x): 111 | from IPython import embed 112 | embed() 113 | return True 114 | 115 | 116 | def debug_two(x, y): 117 | from IPython import embed 118 | embed() 119 | return True 120 | 121 | 122 | def debug_four(cls_loss, box_loss, topk_idx, 123 | cls_loss_ohem, box_loss_ohem, cls_norm): 124 | from IPython import embed 125 | embed() 126 | return True 127 | 128 | 129 | def _get_mask_of_label(label, background, ignore_label): 130 | mask_fg = 1 - tf.to_float(tf.equal(label, background)) 131 | mask_ig = 1 - tf.to_float(tf.equal(label, ignore_label)) 132 | # mask_fg = 1 - label.eq(background) 133 | # mask_ig = 1 - label.eq(ignore_label) 134 | mask = mask_fg * mask_ig 135 | return mask, mask_ig 136 | 137 | 138 | def smooth_l1_loss_valid(bbox_pred, bbox_targets, bbox_inside_weights, 139 | bbox_outside_weights, label, 140 | background=0, ignore_label=-1, 141 | sigma=1.0, dim=[1]): 142 | value = _smooth_l1_loss_base(bbox_pred, bbox_targets, bbox_inside_weights, 143 | bbox_outside_weights, sigma, dim=[1]) 144 | mask, mask_ig = _get_mask_of_label(label, background, ignore_label) 145 | norm = tf.maximum(1, tf.reduce_sum(mask_ig)) 146 | loss = tf.reduce_sum(value, dim) / norm 147 | return loss 148 | -------------------------------------------------------------------------------- /lib/detection_opr/utils/loss_opr_without_box_weight.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | import tensorflow as tf 8 | 9 | 10 | def _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=1.0): 11 | sigma_2 = sigma ** 2 12 | box_diff = bbox_pred - bbox_targets 13 | abs_box_diff = tf.abs(box_diff) 14 | smoothL1_sign = tf.stop_gradient( 15 | tf.to_float(tf.less(abs_box_diff, 1. / sigma_2))) 16 | loss_box = tf.pow(box_diff, 2) * (sigma_2 / 2.0) * smoothL1_sign \ 17 | + (abs_box_diff - (0.5 / sigma_2)) * (1.0 - smoothL1_sign) 18 | return loss_box 19 | 20 | 21 | def smooth_l1_loss_rpn(bbox_pred, bbox_targets, label, sigma=1.0): 22 | value = _smooth_l1_loss_base(bbox_pred, bbox_targets, sigma=sigma) 23 | value = tf.reduce_sum(value, axis=1) 24 | # rpn_select = tf.where(tf.equal(label, 1)) 25 | rpn_select = tf.where(tf.greater(label, 0)) 26 | value_select = tf.gather(value, rpn_select) 27 | mask_ig = tf.stop_gradient( 28 | 1.0 - tf.to_float(tf.equal(label, -1))) 29 | bbox_loss = tf.reduce_sum(value_select) / \ 30 | tf.maximum(1.0, tf.reduce_sum(mask_ig)) 31 | return bbox_loss 32 | 33 | 34 | def smooth_l1_loss_rcnn(bbox_pred, bbox_targets, label, nr_classes, sigma=1.0): 35 | out_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0))) 36 | 37 | bbox_pred = tf.reshape(bbox_pred, [-1, nr_classes, 4]) 38 | bbox_targets = tf.reshape(bbox_targets, [-1, nr_classes, 4]) 39 | 40 | value = _smooth_l1_loss_base( 41 | bbox_pred, bbox_targets, sigma=sigma) 42 | value = tf.reduce_sum(value, 2) 43 | value = tf.reshape(value, [-1, nr_classes]) 44 | 45 | inner_mask = tf.one_hot( 46 | tf.reshape(label, (-1, 1)), depth=nr_classes, axis=1) 47 | inner_mask = tf.stop_gradient( 48 | tf.to_float(tf.reshape(inner_mask, [-1, nr_classes]))) 49 | 50 | bbox_loss = tf.reduce_sum(tf.reduce_sum(value * inner_mask, 1) * out_mask) \ 51 | / tf.to_float((tf.shape(bbox_pred)[0])) 52 | return bbox_loss 53 | 54 | 55 | def sum_ohem_loss(cls_score, label, bbox_pred, bbox_targets, 56 | nr_ohem_sampling, nr_classes, sigma=1.0): 57 | cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 58 | logits=cls_score, labels=label) 59 | 60 | out_mask = tf.stop_gradient(tf.to_float(tf.greater(label, 0))) 61 | bbox_pred = tf.reshape(bbox_pred, [-1, nr_classes, 4]) 62 | bbox_targets = tf.reshape(bbox_targets, [-1, nr_classes, 4]) 63 | value = _smooth_l1_loss_base( 64 | bbox_pred, bbox_targets, sigma=sigma) 65 | value = tf.reduce_sum(value, 2) 66 | value = tf.reshape(value, [-1, nr_classes]) 67 | inner_mask = tf.one_hot(tf.reshape(label, (-1, 1)), depth=nr_classes, 68 | axis=1) 69 | inner_mask = tf.stop_gradient( 70 | tf.to_float(tf.reshape(inner_mask, [-1, nr_classes]))) 71 | box_loss = tf.reduce_sum(value * inner_mask, 1) * out_mask 72 | 73 | cls_box_loss = cls_loss + box_loss 74 | nr_ohem_sampling = tf.minimum(nr_ohem_sampling, 75 | tf.shape(cls_box_loss)[0]) 76 | 77 | topk_val, topk_idx = tf.nn.top_k(cls_box_loss, k=nr_ohem_sampling, 78 | sorted=True, name='ohem_loss_index') 79 | 80 | cls_loss_ohem = tf.gather(cls_loss, topk_idx, name='ohem_cls_loss') 81 | box_loss_ohem = tf.gather(box_loss, topk_idx, name='ohem_box_loss') 82 | 83 | box_loss_ohem = tf.reduce_sum(box_loss_ohem) / tf.to_float(nr_ohem_sampling) 84 | cls_norm = tf.stop_gradient(tf.minimum(nr_ohem_sampling, 85 | tf.shape(topk_val)[0])) 86 | cls_loss_ohem = tf.reduce_sum(cls_loss_ohem) / tf.to_float(cls_norm) 87 | 88 | return cls_loss_ohem, box_loss_ohem 89 | 90 | 91 | def smooth_l1_loss_ohem(bbox_pred, bbox_targets, nr_ohem_sampling, sigma=1.0): 92 | value = _smooth_l1_loss_base( 93 | bbox_pred, bbox_targets, sigma=sigma) 94 | box_loss = tf.reduce_sum(value, axis=1) 95 | 96 | topk_val, topk_idx = tf.nn.top_k( 97 | box_loss, k=nr_ohem_sampling, 98 | sorted=False, name='ohem_box_loss_index') 99 | box_loss_ohem = tf.gather(box_loss, topk_idx, name='ohem_box_loss') 100 | box_loss_ohem = tf.reduce_sum(box_loss_ohem) / nr_ohem_sampling 101 | return box_loss_ohem 102 | 103 | 104 | def softmax_loss_ohem(cls_score, label, nr_ohem_sampling): 105 | cls_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 106 | logits=cls_score, labels=label) 107 | topk_val, topk_idx = tf.nn.top_k(cls_loss, k=nr_ohem_sampling, 108 | sorted=False, name='ohem_cls_loss_index') 109 | cls_loss_ohem = tf.gather(cls_loss, topk_idx, name='ohem_cls_loss') 110 | cls_loss_ohem = tf.reduce_sum(cls_loss_ohem) / nr_ohem_sampling 111 | return cls_loss_ohem 112 | 113 | 114 | def focus_loss( 115 | prob, label, gamma=2.0, alpha=0.25, is_make_onehot=True, nr_cls=None): 116 | if is_make_onehot: 117 | label = tf.one_hot(label, depth=nr_cls) 118 | pt = tf.reduce_sum(tf.to_float(label) * tf.to_float(prob), axis=1) 119 | loss = -1.0 * alpha * tf.pow(1 - pt, gamma) * tf.log(pt + 1e-14) 120 | return loss 121 | -------------------------------------------------------------------------------- /lib/detection_opr/utils/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | from config import cfg 11 | from nms.gpu_nms import gpu_nms 12 | from nms.cpu_nms import cpu_nms, cpu_soft_nms 13 | import numpy as np 14 | 15 | def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.001, method=1): 16 | 17 | keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32), 18 | np.float32(sigma), np.float32(Nt), 19 | np.float32(threshold), 20 | np.uint8(method)) 21 | return keep 22 | 23 | 24 | def nms(dets, thresh, force_cpu=False): 25 | """Dispatch to either CPU or GPU NMS implementations.""" 26 | 27 | if dets.shape[0] == 0: 28 | return [] 29 | if not force_cpu: 30 | #return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 31 | return gpu_nms(dets, thresh) 32 | else: 33 | return cpu_nms(dets, thresh) 34 | -------------------------------------------------------------------------------- /lib/detection_opr/utils/vis_det.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | import matplotlib.pyplot as plt 7 | import random 8 | import numpy as np 9 | import cv2 10 | 11 | 12 | def visualize_detection(img, dets, is_show_label=True, classes=None, 13 | thresh=0.5): 14 | """ 15 | visualize detections in one image 16 | 17 | Parameters: 18 | ---------- 19 | img : numpy.array image, in bgr format 20 | dets : numpy.array ssd detections, 21 | numpy.array([[x1, y1, x2, y2, score, cls_id]...]) 22 | classes : tuple or list of str class names 23 | thresh : float, score threshold 24 | """ 25 | plt.imshow(img) 26 | colors = dict() 27 | for det in dets: 28 | bb = det[:4].astype(int) 29 | 30 | if is_show_label: 31 | cls_id = int(det[5]) 32 | score = det[4] 33 | if cls_id == 0: 34 | continue 35 | if score > thresh: 36 | if cls_id not in colors: 37 | colors[cls_id] = (random.random(), random.random(), 38 | random.random()) 39 | rect = plt.Rectangle((bb[0], bb[1]), bb[2] - bb[0], 40 | bb[3] - bb[1], fill=False, 41 | edgecolor=colors[cls_id], 42 | linewidth=3.5) 43 | plt.gca().add_patch(rect) 44 | if classes and len(classes) > cls_id: 45 | cls_name = classes[cls_id] 46 | else: 47 | cls_name = str(cls_id) 48 | plt.gca().text(bb[0], bb[1] - 2, 49 | '{:s} {:.3f}'.format(cls_name, score), 50 | bbox=dict(facecolor=colors[cls_id], alpha=0.5), 51 | fontsize=12, color='white') 52 | else: 53 | rect = plt.Rectangle((bb[0], bb[1]), bb[2] - bb[0], 54 | bb[3] - bb[1], fill=False, 55 | edgecolor=(1, 0, 0), 56 | linewidth=3.5) 57 | plt.gca().add_patch(rect) 58 | plt.show() 59 | 60 | 61 | # visualize_old: use opencv api 62 | def visualize_detection_old(img, dets, is_show_label=True, classes=None, 63 | thresh=0.5, name='detection'): 64 | """ 65 | visualize detections in one image 66 | 67 | Parameters: 68 | ---------- 69 | img : numpy.array image, in bgr format 70 | dets : numpy.array ssd detections, 71 | numpy.array([[x1, y1, x2, y2, score, cls_id]...]) 72 | classes : tuple or list of str class names 73 | thresh : float, score threshold 74 | """ 75 | im = np.array(img) 76 | colors = dict() 77 | font = cv2.FONT_HERSHEY_SIMPLEX 78 | 79 | for det in dets: 80 | bb = det[:4].astype(int) 81 | if is_show_label: 82 | cls_id = int(det[5]) 83 | score = det[4] 84 | 85 | if cls_id == 0: 86 | continue 87 | if score > thresh: 88 | if cls_id not in colors: 89 | colors[cls_id] = ( 90 | random.random() * 255, random.random() * 255, 91 | random.random() * 255) 92 | 93 | cv2.rectangle(im, (bb[0], bb[1]), (bb[2], bb[3]), 94 | colors[cls_id], 3) 95 | 96 | if classes and len(classes) > cls_id: 97 | cls_name = classes[cls_id] 98 | else: 99 | cls_name = str(cls_id) 100 | cv2.putText(im, '{:s} {:.3f}'.format(cls_name, score), 101 | (bb[0], bb[1] - 2), 102 | font, 0.5, colors[cls_id], 1) 103 | else: 104 | cv2.rectangle(im, (bb[0], bb[1]), (bb[2], bb[3]), (0, 0, 255), 2) 105 | 106 | cv2.imshow(name, im) 107 | while True: 108 | c = cv2.waitKey(100000) 109 | if c == ord('d'): 110 | return 111 | elif c == ord('n'): 112 | break 113 | -------------------------------------------------------------------------------- /lib/lib_kernel/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_nms_dev/make.sh: -------------------------------------------------------------------------------- 1 | TF_INC=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 2 | TF_LIB=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | nvcc -std=c++11 -c -o nms_op.cu.o nms_op.cu.cc \ 6 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 7 | 8 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below 9 | g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=0 -o nms.so nms_op.cc \ 10 | nms_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 -L$TF_LIB -ltensorflow_framework -I$TF_INC/external/nsync/public 11 | 12 | # for gcc5-built tf 13 | #g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=1 -o roi_pooling.so roi_pooling_op.cc \ 14 | # roi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 15 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_nms_dev/nms_op.cc: -------------------------------------------------------------------------------- 1 | /* 2 | @author: zeming li 3 | @contact: zengarden2009@gmail.com 4 | */ 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 11 | #include "tensorflow/core/framework/op.h" 12 | #include "tensorflow/core/framework/op_kernel.h" 13 | #include "tensorflow/core/framework/tensor_shape.h" 14 | #include "tensorflow/core/framework/shape_inference.h" 15 | 16 | using namespace tensorflow; 17 | typedef Eigen::ThreadPoolDevice CPUDevice; 18 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 19 | 20 | 21 | // .Input("nms_overlap_thresh: float") 22 | // .Input("max_out: int32") 23 | REGISTER_OP("NMS") 24 | .Attr("T: {float, double}") 25 | .Attr("nms_overlap_thresh: float") 26 | .Attr("max_out: int") 27 | .Input("boxes: T") 28 | .Output("keep_out: int32") 29 | .Output("num_keep_out: int32") 30 | .Output("mask_dev: uint64"); 31 | 32 | 33 | template 34 | class NMSOp : public OpKernel { 35 | public: 36 | explicit NMSOp(OpKernelConstruction* context) : OpKernel(context) {} 37 | void Compute(OpKernelContext* context) override {} 38 | private: 39 | float nms_overlap_thresh; 40 | int max_out; 41 | }; 42 | 43 | void NMSForward(const float* box_ptr, const int num_box, 44 | unsigned long long * mask_ptr, 45 | int* output_ptr, 46 | int* output_num_ptr, 47 | float iou_threshold, 48 | int max_output, 49 | const Eigen::GpuDevice& d); 50 | 51 | // const Tensor* bottom_rois, const Tensor* argmax_data, 52 | // const Tensor* out_backprop, const float spatial_scale, const int batch_size, 53 | // const int num_rois, const int height, const int width, const int channels, 54 | // const int pooled_height, const int pooled_width, 55 | // const int sample_height, const int sample_width, 56 | // const TensorShape& tensor_output_shape) 57 | 58 | // static void NmsOpKernel( 59 | // OpKernelContext* context, const int num_box, const Tensor* boxes, 60 | // const float nms_overlap_thresh, const int max_out, 61 | // Tensor* keep_out, Tensor * mask_dev) 62 | // { 63 | // if (!context->status().ok()) { 64 | // return; 65 | // } 66 | // std::cout << "dbg: 0" << std::endl; 67 | // std::cout << mask_dev->flat().data() << std::endl; 68 | // int * haha = keep_out->flat().data(); 69 | // haha[0] = 1; 70 | // NMSForward( 71 | // num_box, 72 | // boxes->flat().data(), 73 | // nms_overlap_thresh, 74 | // max_out, 75 | // keep_out->flat().data(), 76 | // mask_dev->flat().data(), 77 | // context->eigen_device()); 78 | // } 79 | 80 | static void NmsOpKernel( 81 | OpKernelContext* context, const Tensor* boxes, const int num_box, 82 | TensorShape& mask_keep_shape, 83 | TensorShape& keep_output_shape, TensorShape& num_keep_output_shape, 84 | const float nms_overlap_thresh, const int max_out) { 85 | Tensor* keep_out = nullptr; 86 | Tensor* num_keep_out = nullptr; 87 | Tensor* mask_keep = nullptr; 88 | 89 | 90 | 91 | 92 | OP_REQUIRES_OK(context, context->allocate_output(0, keep_output_shape, &keep_out)); 93 | 94 | OP_REQUIRES_OK(context, context->allocate_output(1, num_keep_output_shape, &num_keep_out)); 95 | OP_REQUIRES_OK(context, context->allocate_output(2, mask_keep_shape, &mask_keep)); 96 | 97 | if (!context->status().ok()) { 98 | return; 99 | } 100 | NMSForward( 101 | boxes->flat().data(), 102 | num_box, 103 | mask_keep->flat().data(), 104 | keep_out->flat().data(), 105 | num_keep_out->flat().data(), 106 | nms_overlap_thresh, 107 | max_out, 108 | context->eigen_device()); 109 | } 110 | 111 | 112 | template 113 | class NMSOp: public OpKernel { 114 | public: 115 | typedef Eigen::GpuDevice Device; 116 | explicit NMSOp(OpKernelConstruction* context) : OpKernel(context) { 117 | OP_REQUIRES_OK(context, 118 | context->GetAttr("nms_overlap_thresh", &nms_overlap_thresh)); 119 | OP_REQUIRES_OK(context, 120 | context->GetAttr("max_out", &max_out)); 121 | } 122 | 123 | void Compute(OpKernelContext* context) override { 124 | // Get input tensor 125 | const Tensor& det = context->input(0); 126 | // const float nms_overlap_thresh = context->input(1); 127 | // const int max_out = context->input(2); 128 | int num_box = det.dim_size(0); 129 | int box_dim = det.dim_size(1); 130 | 131 | OP_REQUIRES(context, det.dims() == 2, 132 | errors::InvalidArgument("det must be 2-dimensional")); 133 | 134 | //create output tensor 135 | 136 | int dim_keep_out[1]; 137 | dim_keep_out[0] = max_out; 138 | TensorShape keep_output_shape; 139 | TensorShapeUtils::MakeShape(dim_keep_out, 1, &keep_output_shape); 140 | 141 | int dim_mask_keep[1]; 142 | dim_mask_keep[0] = num_box * DIVUP(num_box, sizeof(unsigned long long) * 8); 143 | TensorShape mask_keep_shape; 144 | TensorShapeUtils::MakeShape(dim_mask_keep, 1, &mask_keep_shape); 145 | 146 | int dim_num_keep_out[1]; 147 | dim_num_keep_out[0] = 1; 148 | TensorShape num_keep_output_shape; 149 | TensorShapeUtils::MakeShape(dim_num_keep_out, 1, 150 | &num_keep_output_shape); 151 | 152 | 153 | 154 | 155 | // std::cout<< "mask_keep type " << output_values[0] << std::endl; 156 | if (!context->status().ok()) { 157 | return; 158 | } 159 | NmsOpKernel( 160 | context, &det, num_box, mask_keep_shape, 161 | keep_output_shape, num_keep_output_shape, 162 | nms_overlap_thresh, max_out); 163 | } 164 | private: 165 | float nms_overlap_thresh; 166 | int max_out; 167 | }; 168 | 169 | 170 | //REGISTER_KERNEL_BUILDER(Name("NMSOp").Device(DEVICE_GPU), NMSOp); 171 | REGISTER_KERNEL_BUILDER(Name("NMS").Device(DEVICE_GPU).TypeConstraint("T"), NMSOp); 172 | // #endif 173 | // REGISTER_KERNEL_BUILDER(Name("NMSOp").Device(DEVICE_CPU), NMSOp); 174 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_nms_dev/nms_op.cu.cc: -------------------------------------------------------------------------------- 1 | /* 2 | @author: zeming li 3 | @contact: zengarden2009@gmail.com 4 | */ 5 | 6 | #if GOOGLE_CUDA 7 | #define EIGEN_USE_GPU 8 | 9 | #include 10 | #include 11 | #include "nms_op.h" 12 | using std::vector; 13 | using namespace tensorflow; 14 | #define CUDA_CHECK(condition) \ 15 | /* Code block avoids redefinition of cudaError_t error */ \ 16 | do { \ 17 | cudaError_t error = condition; \ 18 | if (error != cudaSuccess) { \ 19 | std::cout << cudaGetErrorString(error) << std::endl; \ 20 | } \ 21 | } while (0) 22 | 23 | __device__ inline float devIoU(float const * const a, float const * const b) { 24 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 25 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 26 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 27 | float interS = width * height; 28 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | return interS / (Sa + Sb - interS); 31 | } 32 | 33 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 34 | const float *dev_boxes, 35 | unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | // void NMSForward(const int num_box, 81 | // const float* boxes, 82 | // const float nms_overlap_thresh, 83 | // const int max_out, 84 | // int* keep_out, 85 | // unsigned long long * mask_dev, 86 | // const Eigen::GpuDevice& d) { 87 | 88 | // std::cout<< " heheh " << std::endl; 89 | // std::cout << keep_out << std::endl; 90 | // } 91 | template 92 | static inline void cpu_unroll_for(unsigned long long *dst, const unsigned long long *src, int n) { 93 | int nr_out = (n - n % unroll) / unroll; 94 | for (int i = 0; i < nr_out; ++i) { 95 | #pragma unroll 96 | for (int j = 0; j < unroll; ++j) { 97 | *(dst++) |= *(src++); 98 | } 99 | } 100 | for (int j = 0; j < n % unroll; ++j) { 101 | *(dst++) |= *(src++); 102 | } 103 | } 104 | 105 | class HostDevice{ 106 | protected: 107 | static const int nr_init_box = 8000; 108 | public: 109 | vector mask_host; 110 | vector remv; 111 | vector keep_out; 112 | 113 | HostDevice(): mask_host(nr_init_box * (nr_init_box / threadsPerBlock)), remv(nr_init_box / threadsPerBlock), keep_out(nr_init_box){} 114 | }; 115 | 116 | 117 | void NMSForward( 118 | const float* dev_box, const int box_num, 119 | unsigned long long * dev_mask, int* dev_output, 120 | int* dev_output_num, float iou_threshold, 121 | int max_output, const Eigen::GpuDevice& d){ 122 | 123 | void* host_device_ptr = new HostDevice(); 124 | HostDevice* host_device = static_cast(host_device_ptr); 125 | const int col_blocks = DIVUP(box_num, threadsPerBlock); 126 | dim3 blocks(DIVUP(box_num, threadsPerBlock), 127 | DIVUP(box_num, threadsPerBlock)); 128 | dim3 threads(threadsPerBlock); 129 | nms_kernel<<>>(box_num, iou_threshold, dev_box, dev_mask); 130 | 131 | vector& _mask_host = host_device->mask_host; 132 | vector& _remv = host_device->remv; 133 | vector& _keep_out = host_device->keep_out; 134 | int current_mask_host_size = box_num * col_blocks; 135 | if(_mask_host.capacity() < current_mask_host_size){ 136 | _mask_host.reserve(current_mask_host_size); 137 | } 138 | CUDA_CHECK(cudaMemcpyAsync(&_mask_host[0], dev_mask, sizeof(unsigned long long) * box_num * col_blocks, cudaMemcpyDeviceToHost, d.stream())); 139 | 140 | if(_remv.capacity() < col_blocks){ 141 | _remv.reserve(col_blocks); 142 | } 143 | if(_keep_out.capacity() < box_num){ 144 | _keep_out.reserve(box_num); 145 | } 146 | if(max_output < 0){ 147 | max_output = box_num; 148 | } 149 | memset(&_remv[0], 0, sizeof(unsigned long long) * col_blocks); 150 | CUDA_CHECK(cudaStreamSynchronize(d.stream())); 151 | 152 | int num_to_keep = 0; 153 | for (int i = 0; i < box_num; i++) { 154 | int nblock = i / threadsPerBlock; 155 | int inblock = i % threadsPerBlock; 156 | if (!(_remv[nblock] & (1ULL << inblock))) { 157 | _keep_out[num_to_keep++] = i; 158 | if(num_to_keep == max_output) break; 159 | unsigned long long *p = &_mask_host[0] + i * col_blocks + nblock; 160 | unsigned long long *q = &_remv[0] + nblock; 161 | cpu_unroll_for(q, p, col_blocks - nblock); 162 | } 163 | } 164 | CUDA_CHECK(cudaMemcpyAsync(dev_output, &_keep_out[0], num_to_keep * sizeof(int), cudaMemcpyHostToDevice, d.stream())); 165 | CUDA_CHECK(cudaMemcpyAsync(dev_output_num, &num_to_keep, sizeof(int), cudaMemcpyHostToDevice, d.stream())); 166 | delete host_device; 167 | } 168 | 169 | #endif // GOOGLE_CUDA 170 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_nms_dev/nms_op.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_NMS_OP_H_ 6 | #define TENSORFLOW_USER_OPS_NMS_OP_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | //#define DIVUP(m,n) (((m) - 1) / (n) + 1) 15 | // keepout and numout are the kernel output 16 | /* 17 | int* keep_out, int* num_out, 18 | */ 19 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 20 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 21 | // void NMSForward(const int num_box, 22 | // const float* boxes, 23 | // const float nms_overlap_thresh, 24 | // const int max_out, 25 | // int* keep_out, 26 | // unsigned long long * mask_dev, 27 | // const Eigen::GpuDevice& d); 28 | 29 | // } 30 | void NMSForward(const float* box_ptr, const int num_box, 31 | unsigned long long * mask_ptr, 32 | int* output_ptr, 33 | int* output_num_ptr, 34 | float iou_threshold, 35 | int max_output, 36 | const Eigen::GpuDevice& d); 37 | } 38 | // void NMSForward(const int num_box, 39 | // const float* boxes, 40 | // const float nms_overlap_thresh, 41 | // const int max_out, 42 | // int* keep_out, 43 | // unsigned long long * mask_dev, 44 | // const Eigen::GpuDevice& d); 45 | // } 46 | #endif // TENSORFLOW_CORE_KERNELS_NMS_OP_H_ 47 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_nms_dev/nms_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'nms.so') 5 | _nms_module = tf.load_op_library(filename) 6 | nms = _nms_module.nms 7 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_nms_dev/nms_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pdb 4 | import os 5 | import nms_op 6 | 7 | os.environ["CUDA_VISIBLE_DEVICES"] = '1' 8 | 9 | # pdb.set_trace() 10 | 11 | rois = tf.convert_to_tensor( 12 | [[0, 1, 2, 3, 4], [1, 2, 3, 4, 4], [2, 3, 4, 5, 2]], 13 | dtype=tf.float32) 14 | 15 | nms_out = nms_op.nms(rois, 0.1, 3) 16 | 17 | hehe = nms_out[0][0:nms_out[1][0]] 18 | # keep_out = nms_out[0][:nms_out[1]] 19 | 20 | # hh= tf.transpose(hh, [0, 3, 1, 2]) 21 | # [y2, channels, argmax_position] = psalign_pooling_op.psalign_pool( 22 | # hh, rois, group_size=5, sample_height=2, 23 | # sample_width=2, spatial_scale=1.0) 24 | # [y2, channels] = psalign_pooling_op.psalign_pool( 25 | # hh, rois, 5, 2, 2, 1.0) 26 | 27 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 28 | 29 | print(sess.run(rois)) 30 | print("-------") 31 | print(sess.run( nms_out)) 32 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling/make.sh: -------------------------------------------------------------------------------- 1 | TF_INC=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 2 | TF_LIB=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | nvcc -std=c++11 -c -o psalign_pooling_op.cu.o psalign_pooling_op_gpu.cu.cc \ 6 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 -I$TF_INC/external/nsync/public --expt-relaxed-constexpr 7 | 8 | g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=0 -o psalign_pooling.so psalign_pooling_op.cc \ 9 | psalign_pooling_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 -L$TF_LIB -ltensorflow_framework -I$TF_INC/external/nsync/public 10 | 11 | # g++ -std=c++11 -shared -o psalign_pooling.so psalign_pooling_op.cc \ 12 | # psalign_pooling_op.cu.o -I $TF_INC -D GOOGLE_CUDA=1 -fPIC -lcudart -L $CUDA_PATH/lib64 13 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling/psalign_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'psalign_pooling.so') 5 | _psalign_pooling_module = tf.load_op_library(filename) 6 | psalign_pool = _psalign_pooling_module.ps_align_pool 7 | psalign_pool_grad = _psalign_pooling_module.ps_align_pool_grad -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling/psalign_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_PSALIGNPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_PSALIGNPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool PSAlignPoolForwardLauncher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, 20 | const int channels, const int height, const int width, 21 | const int pooled_height, const int pooled_width, 22 | const int sample_height, const int sample_width, 23 | const float* bottom_rois, 24 | const int output_dim, const int group_size, float* top_data, 25 | int* mapping_channel, int* argmax_position, 26 | const Eigen::GpuDevice& d); 27 | 28 | bool PSAlignPoolBackwardLauncher(const float* top_diff, 29 | const int* mapping_channel, const int* argmax_position, 30 | const int num_rois, const float spatial_scale, const int channels, 31 | const int height, const int width, 32 | const int pooled_height, const int pooled_width, 33 | const int sample_height, const int sample_width, 34 | const int output_dim, 35 | float* bottom_diff, const float* bottom_rois, const Eigen::GpuDevice& d); 36 | } // namespace tensorflow 37 | 38 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 39 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling/psalign_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import os 4 | import sys 5 | sys.path.insert(0, os.path.dirname(__file__)) 6 | import psalign_pooling_op 7 | import pdb 8 | 9 | 10 | # @ops.RegisterShape("PSROIPool") 11 | # def _psroi_pool_shape(op): 12 | # """Shape function for the PSROIPool op. 13 | 14 | # """ 15 | # dims_data = op.inputs[0].get_shape().as_list() 16 | # channels = dims_data[3] 17 | # dims_rois = op.inputs[1].get_shape().as_list() 18 | # num_rois = dims_rois[0] 19 | # output_dim = op.get_attr('output_dim') 20 | # group_size = op.get_attr('group_size') 21 | # pooled_height = group_size 22 | # pooled_width = group_size 23 | 24 | # output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim]) 25 | # return [output_shape, output_shape] 26 | 27 | @ops.RegisterGradient("PSAlignPool") 28 | def _psalign_pool_grad(op, grad, _1, _2): 29 | """The gradients for `psalign_pool`. 30 | Args: 31 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 32 | to find the inputs and outputs of the original op. 33 | grad: Gradient with respect to the output of the `roi_pool` op. 34 | Returns: 35 | Gradients with respect to the input of `zero_out`. 36 | """ 37 | 38 | data = op.inputs[0] 39 | rois = op.inputs[1] 40 | mapping_channel = op.outputs[1] 41 | argmax_position = op.outputs[2] 42 | sample_height = op.get_attr('sample_height') 43 | sample_width = op.get_attr('sample_width') 44 | spatial_scale = op.get_attr('spatial_scale') 45 | 46 | # compute gradient 47 | #data_grad = psalign_pooling_op.psalign_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 48 | data_grad = psalign_pooling_op.psalign_pool_grad( 49 | data, rois, mapping_channel, argmax_position, grad, 50 | sample_height, sample_width, spatial_scale) 51 | 52 | return [data_grad, None] # List of one Tensor, since we have one input 53 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling/psalign_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import psalign_pooling_op 4 | import psalign_pooling_op_grad 5 | import pdb 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"] = '7' 8 | 9 | # pdb.set_trace() 10 | 11 | rois = tf.convert_to_tensor([[0, 0, 0, 4, 4], [0, 0, 0, 2, 4], [ 12 | 0, 0, 0, 4, 2]], dtype=tf.float32) 13 | hh = tf.convert_to_tensor(np.random.rand(1, 5, 5, 25*7), dtype=tf.float32) 14 | #hh= tf.transpose(hh, [0, 3, 1, 2]) 15 | 16 | 17 | [y2, channels, argmax_position] = psalign_pooling_op.psalign_pool(hh, rois, group_size=5, sample_height=2, sample_width=2, spatial_scale=1.0) 18 | # [y2, channels] = psalign_pooling_op.psalign_pool( 19 | # hh, rois, 5, 2, 2, 1.0) 20 | 21 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 22 | from IPython import embed 23 | 24 | print(sess.run(hh)) 25 | print("-------") 26 | print(sess.run(y2)) 27 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling_ave/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling_ave/make.sh: -------------------------------------------------------------------------------- 1 | TF_INC=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 2 | CUDA_PATH=/usr/local/cuda/ 3 | 4 | nvcc -std=c++11 -c -o psalign_pooling_op.cu.o psalign_pooling_op_gpu.cu.cc \ 5 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 -I$TF_INC/external/nsync/public --expt-relaxed-constexpr 6 | 7 | g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=0 -o psalign_pooling.so psalign_pooling_op.cc \ 8 | psalign_pooling_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 -L$TF_LIB -ltensorflow_framework -I$TF_INC/external/nsync/public 9 | 10 | # g++ -std=c++11 -shared -o psalign_pooling.so psalign_pooling_op.cc \ 11 | # psalign_pooling_op.cu.o -I $TF_INC -D GOOGLE_CUDA=1 -fPIC -lcudart -L $CUDA_PATH/lib64 12 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling_ave/psalign_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'psalign_pooling.so') 5 | _psalign_pooling_module = tf.load_op_library(filename) 6 | psalign_pool = _psalign_pooling_module.ps_align_pool 7 | psalign_pool_grad = _psalign_pooling_module.ps_align_pool_grad -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling_ave/psalign_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_PSALIGNPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_PSALIGNPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool PSAlignPoolForwardLauncher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, 20 | const int channels, const int height, const int width, 21 | const int pooled_height, const int pooled_width, 22 | const int sample_height, const int sample_width, 23 | const float* bottom_rois, 24 | const int output_dim, const int group_size, float* top_data, 25 | int* mapping_channel, int* argmax_position, 26 | const Eigen::GpuDevice& d); 27 | 28 | bool PSAlignPoolBackwardLauncher(const float* top_diff, 29 | const int* mapping_channel, const int* argmax_position, 30 | const int num_rois, const float spatial_scale, const int channels, 31 | const int height, const int width, 32 | const int pooled_height, const int pooled_width, 33 | const int sample_height, const int sample_width, 34 | const int output_dim, 35 | float* bottom_diff, const float* bottom_rois, const Eigen::GpuDevice& d); 36 | } // namespace tensorflow 37 | 38 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 39 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling_ave/psalign_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import os 4 | import sys 5 | sys.path.insert(0, os.path.dirname(__file__)) 6 | import psalign_pooling_op 7 | import pdb 8 | 9 | 10 | # @ops.RegisterShape("PSROIPool") 11 | # def _psroi_pool_shape(op): 12 | # """Shape function for the PSROIPool op. 13 | 14 | # """ 15 | # dims_data = op.inputs[0].get_shape().as_list() 16 | # channels = dims_data[3] 17 | # dims_rois = op.inputs[1].get_shape().as_list() 18 | # num_rois = dims_rois[0] 19 | # output_dim = op.get_attr('output_dim') 20 | # group_size = op.get_attr('group_size') 21 | # pooled_height = group_size 22 | # pooled_width = group_size 23 | 24 | # output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim]) 25 | # return [output_shape, output_shape] 26 | 27 | @ops.RegisterGradient("PSAlignPool") 28 | def _psalign_pool_grad(op, grad, _1, _2): 29 | """The gradients for `psalign_pool`. 30 | Args: 31 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 32 | to find the inputs and outputs of the original op. 33 | grad: Gradient with respect to the output of the `roi_pool` op. 34 | Returns: 35 | Gradients with respect to the input of `zero_out`. 36 | """ 37 | 38 | data = op.inputs[0] 39 | rois = op.inputs[1] 40 | mapping_channel = op.outputs[1] 41 | argmax_position = op.outputs[2] 42 | sample_height = op.get_attr('sample_height') 43 | sample_width = op.get_attr('sample_width') 44 | spatial_scale = op.get_attr('spatial_scale') 45 | 46 | # compute gradient 47 | #data_grad = psalign_pooling_op.psalign_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 48 | data_grad = psalign_pooling_op.psalign_pool_grad( 49 | data, rois, mapping_channel, argmax_position, grad, 50 | sample_height, sample_width, spatial_scale) 51 | 52 | return [data_grad, None] # List of one Tensor, since we have one input 53 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psalign_pooling_ave/psalign_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import psalign_pooling_op 4 | import psalign_pooling_op_grad 5 | import pdb 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"] = '7' 8 | 9 | # pdb.set_trace() 10 | 11 | rois = tf.convert_to_tensor([[0, 0, 0, 4, 4], [0, 0, 0, 2, 4], [ 12 | 0, 0, 0, 4, 2]], dtype=tf.float32) 13 | hh = tf.convert_to_tensor(np.random.rand(1, 5, 5, 25*7), dtype=tf.float32) 14 | #hh= tf.transpose(hh, [0, 3, 1, 2]) 15 | 16 | 17 | [y2, channels, argmax_position] = psalign_pooling_op.psalign_pool(hh, rois, group_size=5, sample_height=2, sample_width=2, spatial_scale=1.0) 18 | # [y2, channels] = psalign_pooling_op.psalign_pool( 19 | # hh, rois, 5, 2, 2, 1.0) 20 | 21 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 22 | from IPython import embed 23 | 24 | print(sess.run(hh)) 25 | print("-------") 26 | print(sess.run(y2)) 27 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psroi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psroi_pooling/make.sh: -------------------------------------------------------------------------------- 1 | TF_INC=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 2 | TF_LIB=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | nvcc -std=c++11 -c -o psroi_pooling_op.cu.o psroi_pooling_op_gpu.cu.cc \ 6 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 -I$TF_INC/external/nsync/public --expt-relaxed-constexpr 7 | 8 | g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=0 -o psroi_pooling.so psroi_pooling_op.cc \ 9 | psroi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 -L$TF_LIB -ltensorflow_framework -I$TF_INC/external/nsync/public 10 | 11 | # g++ -std=c++11 -shared -o psroi_pooling.so psroi_pooling_op.cc \ 12 | # psroi_pooling_op.cu.o -I $TF_INC -D GOOGLE_CUDA=1 -fPIC -lcudart -L $CUDA_PATH/lib64 13 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psroi_pooling/psroi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'psroi_pooling.so') 5 | _psroi_pooling_module = tf.load_op_library(filename) 6 | psroi_pool = _psroi_pooling_module.psroi_pool 7 | psroi_pool_grad = _psroi_pooling_module.psroi_pool_grad -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psroi_pooling/psroi_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_PSROIPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool PSROIPoolForwardLauncher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, 20 | const int channels, const int height, const int width, 21 | const int pooled_height, const int pooled_width, const float* bottom_rois, 22 | const int output_dim, const int group_size, float* top_data, 23 | int* mapping_channel, const Eigen::GpuDevice& d); 24 | 25 | bool PSROIPoolBackwardLauncher( 26 | const float* top_diff, const int* mapping_channel, const int batch_size, 27 | const int num_rois, const float spatial_scale, const int channels, 28 | const int height, const int width, const int pooled_height, 29 | const int pooled_width, const int output_dim, 30 | float* bottom_diff, const float* bottom_rois, const Eigen::GpuDevice& d); 31 | } // namespace tensorflow 32 | 33 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 34 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psroi_pooling/psroi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import os 4 | import sys 5 | sys.path.insert(0, os.path.dirname(__file__)) 6 | import psroi_pooling_op 7 | import pdb 8 | 9 | 10 | # @ops.RegisterShape("PSROIPool") 11 | # def _psroi_pool_shape(op): 12 | # """Shape function for the PSROIPool op. 13 | 14 | # """ 15 | # dims_data = op.inputs[0].get_shape().as_list() 16 | # channels = dims_data[3] 17 | # dims_rois = op.inputs[1].get_shape().as_list() 18 | # num_rois = dims_rois[0] 19 | # output_dim = op.get_attr('output_dim') 20 | # group_size = op.get_attr('group_size') 21 | # pooled_height = group_size 22 | # pooled_width = group_size 23 | 24 | # output_shape = tf.TensorShape([num_rois, pooled_height, pooled_width, output_dim]) 25 | # return [output_shape, output_shape] 26 | 27 | @ops.RegisterGradient("PSROIPool") 28 | def _psroi_pool_grad(op, grad, _): 29 | """The gradients for `PSROI_pool`. 30 | Args: 31 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 32 | to find the inputs and outputs of the original op. 33 | grad: Gradient with respect to the output of the `roi_pool` op. 34 | Returns: 35 | Gradients with respect to the input of `zero_out`. 36 | """ 37 | 38 | data = op.inputs[0] 39 | rois = op.inputs[1] 40 | mapping_channel = op.outputs[1] 41 | spatial_scale = op.get_attr('spatial_scale') 42 | 43 | # compute gradient 44 | #data_grad = psroi_pooling_op.psroi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 45 | data_grad = psroi_pooling_op.psroi_pool_grad( 46 | data, rois, mapping_channel, grad, spatial_scale) 47 | 48 | return [data_grad, None] # List of one Tensor, since we have one input 49 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_psroi_pooling/psroi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import psroi_pooling_op 4 | import psroi_pooling_op_grad 5 | import pdb 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"] = '7' 8 | 9 | # pdb.set_trace() 10 | 11 | rois = tf.convert_to_tensor([[0, 0, 0, 4, 4], [0, 0, 0, 2, 4], [ 12 | 0, 0, 0, 4, 2]], dtype=tf.float32) 13 | hh = tf.convert_to_tensor(np.random.rand(1, 5, 5, 25*7), dtype=tf.float32) 14 | #hh= tf.transpose(hh, [0, 3, 1, 2]) 15 | # [y2, channels] = psroi_pooling_op.psroi_pool( 16 | # hh, rois, group_size=5, spatial_scale=1.0) 17 | [y2, channels] = psroi_pooling_op.psroi_pool( 18 | hh, rois, 5, 1.0) 19 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) 20 | from IPython import embed 21 | embed() 22 | print(sess.run(hh)) 23 | print("-------") 24 | print(sess.run(y2)) 25 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/Readme: -------------------------------------------------------------------------------- 1 | tf 的fm和caffe 顺序不一样,roi layer的实现效率有问题 2 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/lzm_roi_align_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import roi_align_op 4 | import roi_align_op_grad 5 | import tensorflow as tf 6 | import pdb 7 | 8 | 9 | def weight_variable(shape): 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def conv2d(x, W): 14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 15 | 16 | array = np.random.rand(32, 100, 100, 3) 17 | #array = np.arange(100) 18 | #array = array.reshape(1, 10, 10, 1) 19 | #array = np.ones((32, 100, 100, 3)) 20 | data = tf.convert_to_tensor(array, dtype=tf.float32) 21 | rois = tf.convert_to_tensor([[0, 0, 0, 6, 6]], dtype=tf.float32) 22 | 23 | W = weight_variable([3, 3, 3, 1]) 24 | h = conv2d(data, W) 25 | 26 | [y, argmax] = roi_align_op.roi_align(h, rois, 7, 7, 1, 1, 1.0) 27 | #pdb.set_trace() 28 | y_data = tf.convert_to_tensor(np.ones((1, 7, 7, 1)), dtype=tf.float32) 29 | print(y_data, y, argmax) 30 | 31 | # Minimize the mean squared errors. 32 | loss = tf.reduce_mean(tf.square(y - y_data)) 33 | optimizer = tf.train.GradientDescentOptimizer(0.5) 34 | train = optimizer.minimize(loss) 35 | 36 | init = tf.global_variables_initializer() 37 | 38 | # Launch the graph. 39 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 40 | sess.run(init) 41 | #a, b = sess.run([y, argmax]) 42 | #from IPython import embed; embed() 43 | #print(sess.run(y)) 44 | #pdb.set_trace() 45 | for step in range(2): 46 | sess.run(train) 47 | print(step, sess.run(W)) 48 | #print(sess.run(y)) 49 | 50 | #with tf.device('/gpu:0'): 51 | # result = module.roi_pool(data, rois, 1, 1, 1.0/1) 52 | # print result.eval() 53 | #with tf.device('/cpu:0'): 54 | # run(init) 55 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/make.sh: -------------------------------------------------------------------------------- 1 | TF_INC=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 2 | TF_LIB=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | nvcc -std=c++11 -c -o roi_align_op.cu.o roi_align_op_gpu.cu.cc \ 6 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 -I$TF_INC/external/nsync/public 7 | 8 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below 9 | g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=0 -o roi_align.so roi_align_op.cc \ 10 | roi_align_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 -L$TF_LIB -ltensorflow_framework -I$TF_INC/external/nsync/public 11 | 12 | # for gcc5-built tf 13 | #g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=1 -o roi_align.so roi_align_op.cc \ 14 | # roi_align_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 15 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/roi_align_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'roi_align.so') 5 | _roi_align_module = tf.load_op_library(filename) 6 | roi_align = _roi_align_module.roi_align 7 | roi_align_grad = _roi_align_module.roi_align_grad 8 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/roi_align_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_ROIALIGN_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_ROIALIGN_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool RoiAlignForwardLaucher( 19 | const float* bottom_data, const float spatial_scale, 20 | const int num_rois, const int height, const int width, const int channels, 21 | const int pooled_height, const int pooled_width, 22 | const int sample_height, const int sample_width, const float* bottom_rois, 23 | float* top_data, int* argmax_data, const Eigen::GpuDevice& d); 24 | 25 | bool RoiAlignBackwardLaucher( 26 | const float* top_diff, const float spatial_scale, 27 | const int batch_size, const int num_rois, 28 | const int height, const int width, const int channels, 29 | const int pooled_height, const int pooled_width, 30 | const int sample_height, const int sample_width, const float* bottom_rois, 31 | float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d); 32 | 33 | } // namespace tensorflow 34 | 35 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 36 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/roi_align_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import sys 4 | import os 5 | sys.path.insert(0, os.path.dirname(__file__)) 6 | import roi_align_op 7 | 8 | @ops.RegisterGradient("RoiAlign") 9 | def _roi_align_grad(op, grad, _): 10 | """The gradients for `roi_align`. 11 | Args: 12 | op: The `roi_align` `Operation` that we are differentiating, which we can use 13 | to find the inputs and outputs of the original op. 14 | grad: Gradient with respect to the output of the `roi_align` op. 15 | Returns: 16 | Gradients with respect to the input of `roi align`. 17 | """ 18 | data = op.inputs[0] 19 | rois = op.inputs[1] 20 | argmax = op.outputs[1] 21 | pooled_height = op.get_attr('pooled_height') 22 | pooled_width = op.get_attr('pooled_width') 23 | sample_height = op.get_attr('sample_height') 24 | sample_width = op.get_attr('sample_width') 25 | spatial_scale = op.get_attr('spatial_scale') 26 | 27 | # compute gradient 28 | data_grad = roi_align_op.roi_align_grad(data, rois, argmax, grad, 29 | pooled_height, pooled_width, sample_height, sample_width, spatial_scale) 30 | 31 | return [data_grad, None] # List of one Tensor, since we have one input 32 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_align/roi_align_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import roi_align_op 4 | import roi_align_op_grad 5 | import tensorflow as tf 6 | import pdb 7 | import os 8 | os.environ["CUDA_VISIBLE_DEVICES"] = "7" 9 | 10 | 11 | def weight_variable(shape): 12 | initial = tf.truncated_normal(shape, stddev=0.1) 13 | return tf.Variable(initial) 14 | 15 | def conv2d(x, W): 16 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 17 | 18 | array = np.random.rand(32, 100, 100, 3) 19 | #array = np.ones((32, 100, 100, 3)) 20 | data = tf.convert_to_tensor(array, dtype=tf.float32) 21 | rois = tf.convert_to_tensor([[0, 10, 20, 30, 40], [31, 10, 20, 30, 40]], dtype=tf.float32) 22 | 23 | W = weight_variable([3, 3, 3, 1]) 24 | h = conv2d(data, W) 25 | 26 | [y, argmax] = roi_align_op.roi_align(data, rois, 6, 6, 2, 2, 1.0) 27 | #pdb.set_trace() 28 | y_data = tf.convert_to_tensor(np.ones((2, 6, 6, 1)), dtype=tf.float32) 29 | print(y_data, y, argmax) 30 | 31 | # Minimize the mean squared errors. 32 | #loss = tf.reduce_mean(tf.square(y - y_data)) 33 | #optimizer = tf.train.GradientDescentOptimizer(0.5) 34 | #train = optimizer.minimize(loss) 35 | 36 | init = tf.global_variables_initializer() 37 | 38 | # Launch the graph. 39 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 40 | sess.run(init) 41 | a, b = sess.run([y, argmax]) 42 | from IPython import embed; embed() 43 | print(sess.run(y)) 44 | #pdb.set_trace() 45 | #for step in range(10): 46 | # sess.run(train) 47 | # print(step, sess.run(W)) 48 | # print(sess.run(y)) 49 | 50 | #with tf.device('/gpu:0'): 51 | # result = module.roi_pool(data, rois, 1, 1, 1.0/1) 52 | # print result.eval() 53 | #with tf.device('/cpu:0'): 54 | # run(init) 55 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_pooling/Readme: -------------------------------------------------------------------------------- 1 | tf 的fm和caffe 顺序不一样,roi layer的实现效率有问题 2 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_pooling/make.sh: -------------------------------------------------------------------------------- 1 | TF_INC=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_include())') 2 | TF_LIB=$(python3 -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())') 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | nvcc -std=c++11 -c -o roi_pooling_op.cu.o roi_pooling_op_gpu.cu.cc \ 6 | -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 -I$TF_INC/external/nsync/public 7 | 8 | ## if you install tf using already-built binary, or gcc version 4.x, uncomment the two lines below 9 | g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=0 -o roi_pooling.so roi_pooling_op.cc \ 10 | roi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 -L$TF_LIB -ltensorflow_framework -I$TF_INC/external/nsync/public 11 | 12 | # for gcc5-built tf 13 | #g++ -std=c++11 -shared -D_GLIBCXX_USE_CXX11_ABI=1 -o roi_pooling.so roi_pooling_op.cc \ 14 | # roi_pooling_op.cu.o -I $TF_INC -fPIC -lcudart -L $CUDA_PATH/lib64 15 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_pooling/roi_pooling_op.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import os.path as osp 3 | 4 | filename = osp.join(osp.dirname(__file__), 'roi_pooling.so') 5 | _roi_pooling_module = tf.load_op_library(filename) 6 | roi_pool = _roi_pooling_module.roi_pool 7 | roi_pool_grad = _roi_pooling_module.roi_pool_grad 8 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_pooling/roi_pooling_op_gpu.h: -------------------------------------------------------------------------------- 1 | #if !GOOGLE_CUDA 2 | #error This file must only be included when building with Cuda support 3 | #endif 4 | 5 | #ifndef TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 6 | #define TENSORFLOW_USER_OPS_ROIPOOLING_OP_GPU_H_ 7 | 8 | #define EIGEN_USE_GPU 9 | 10 | #include "tensorflow/core/framework/tensor_types.h" 11 | #include "tensorflow/core/platform/types.h" 12 | 13 | namespace tensorflow { 14 | 15 | // Run the forward pass of max pooling, optionally writing the argmax indices to 16 | // the mask array, if it is not nullptr. If mask is passed in as nullptr, the 17 | // argmax indices are not written. 18 | bool ROIPoolForwardLaucher( 19 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 20 | const int width, const int channels, const int pooled_height, 21 | const int pooled_width, const float* bottom_rois, 22 | float* top_data, int* argmax_data, const Eigen::GpuDevice& d); 23 | 24 | bool ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int pooled_height, 26 | const int pooled_width, const float* bottom_rois, 27 | float* bottom_diff, const int* argmax_data, const Eigen::GpuDevice& d); 28 | 29 | } // namespace tensorflow 30 | 31 | #endif // TENSORFLOW_CORE_KERNELS_MAXPOOLING_OP_GPU_H_ 32 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_pooling/roi_pooling_op_grad.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.framework import ops 3 | import sys 4 | import os 5 | sys.path.insert(0, os.path.dirname(__file__)) 6 | import roi_pooling_op 7 | 8 | @ops.RegisterGradient("RoiPool") 9 | def _roi_pool_grad(op, grad, _): 10 | """The gradients for `roi_pool`. 11 | Args: 12 | op: The `roi_pool` `Operation` that we are differentiating, which we can use 13 | to find the inputs and outputs of the original op. 14 | grad: Gradient with respect to the output of the `roi_pool` op. 15 | Returns: 16 | Gradients with respect to the input of `zero_out`. 17 | """ 18 | data = op.inputs[0] 19 | rois = op.inputs[1] 20 | argmax = op.outputs[1] 21 | pooled_height = op.get_attr('pooled_height') 22 | pooled_width = op.get_attr('pooled_width') 23 | spatial_scale = op.get_attr('spatial_scale') 24 | 25 | # compute gradient 26 | data_grad = roi_pooling_op.roi_pool_grad(data, rois, argmax, grad, pooled_height, pooled_width, spatial_scale) 27 | 28 | return [data_grad, None] # List of one Tensor, since we have one input 29 | -------------------------------------------------------------------------------- /lib/lib_kernel/lib_roi_pooling/roi_pooling_op_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import roi_pooling_op 4 | import roi_pooling_op_grad 5 | import tensorflow as tf 6 | import pdb 7 | 8 | 9 | def weight_variable(shape): 10 | initial = tf.truncated_normal(shape, stddev=0.1) 11 | return tf.Variable(initial) 12 | 13 | def conv2d(x, W): 14 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 15 | 16 | array = np.random.rand(32, 100, 100, 3) 17 | data = tf.convert_to_tensor(array, dtype=tf.float32) 18 | rois = tf.convert_to_tensor([[0, 10, 10, 20, 20], [31, 30, 30, 40, 40]], dtype=tf.float32) 19 | 20 | W = weight_variable([3, 3, 3, 1]) 21 | h = conv2d(data, W) 22 | 23 | [y, argmax] = roi_pooling_op.roi_pool(h, rois, 6, 6, 1.0/3) 24 | #pdb.set_trace() 25 | y_data = tf.convert_to_tensor(np.ones((2, 6, 6, 1)), dtype=tf.float32) 26 | print(y_data, y, argmax) 27 | 28 | # Minimize the mean squared errors. 29 | loss = tf.reduce_mean(tf.square(y - y_data)) 30 | optimizer = tf.train.GradientDescentOptimizer(0.5) 31 | train = optimizer.minimize(loss) 32 | 33 | init = tf.global_variables_initializer() 34 | 35 | # Launch the graph. 36 | sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) 37 | sess.run(init) 38 | #pdb.set_trace() 39 | for step in range(10): 40 | sess.run(train) 41 | print(step, sess.run(W)) 42 | print(sess.run(y)) 43 | 44 | #with tf.device('/gpu:0'): 45 | # result = module.roi_pool(data, rois, 1, 1, 1.0/1) 46 | # print result.eval() 47 | #with tf.device('/cpu:0'): 48 | # run(init) 49 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | pushd ./utils/py_faster_rcnn_utils 2 | make; 3 | popd 4 | 5 | pushd lib_kernel/lib_psroi_pooling 6 | sh make.sh 7 | popd 8 | 9 | pushd lib_kernel/lib_roi_pooling 10 | sh make.sh 11 | popd 12 | 13 | pushd lib_kernel/lib_roi_align 14 | sh make.sh 15 | popd 16 | 17 | pushd lib_kernel/lib_psalign_pooling 18 | sh make.sh 19 | popd 20 | 21 | pushd lib_kernel/lib_nms_dev 22 | sh make.sh 23 | popd 24 | 25 | pushd datasets_odgt/lib_coco/PythonAPI 26 | make install 27 | popd 28 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | 4 | -------------------------------------------------------------------------------- /lib/utils/dpflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengarden/light_head_rcnn/790f94e4e1481fbc403b101a763d4a9df56ee32a/lib/utils/dpflow/__init__.py -------------------------------------------------------------------------------- /lib/utils/dpflow/dpflow.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | import zmq 8 | import multiprocessing as mp 9 | from config import config 10 | from utils.dpflow.serialize import loads, dumps 11 | import dataset 12 | 13 | def data_sender(id, name, *args): 14 | context = zmq.Context() 15 | sender = context.socket(zmq.PUSH) 16 | sender.connect('ipc://@{}'.format(name)) 17 | 18 | print('start data provider {}-{}'.format(name, id)) 19 | while True: 20 | data_iter = dataset.train_dataset(id + 1) 21 | for msg in data_iter: 22 | # print(id) 23 | sender.send(dumps([id, msg])) 24 | 25 | 26 | def provider(nr_proc, name, *args): 27 | proc_ids = [i for i in range(nr_proc)] 28 | 29 | procs = [] 30 | for i in range(nr_proc): 31 | w = mp.Process( 32 | target=data_sender, 33 | args=(proc_ids[i], name, *args)) 34 | w.deamon = True 35 | procs.append(w) 36 | 37 | for p in procs: 38 | p.start() 39 | 40 | 41 | # , dataset.train_dataset() 42 | 43 | def receiver(name): 44 | context = zmq.Context() 45 | receiver = context.socket(zmq.PULL) 46 | receiver.bind('ipc://@{}'.format(name)) 47 | 48 | while True: 49 | id, msg = loads(receiver.recv()) 50 | # print(id, end='') 51 | yield msg 52 | 53 | 54 | if __name__ == "__main__": 55 | from IPython import embed 56 | import time 57 | provider(config.nr_dpflows, config.program_name) 58 | dataiter = receiver(config.program_name) 59 | start = time.clock() 60 | time.sleep(10) 61 | for i in range(1000): 62 | hehe = next(dataiter) 63 | end = time.clock() 64 | print("read: %f s" % (end - start)) -------------------------------------------------------------------------------- /lib/utils/dpflow/prefetching_iter.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | import threading 8 | 9 | class PrefetchingIter: 10 | ''' 11 | iters: DataIter, must have forward to get 12 | ''' 13 | 14 | def __init__(self, iters, num_gpu): 15 | self.iters = iters 16 | self.n_iter = 1 17 | self.data_ready = [threading.Event() for _ in range(self.n_iter)] 18 | self.data_taken = [threading.Event() for _ in range(self.n_iter)] 19 | for e in self.data_taken: 20 | e.set() 21 | self.started = True 22 | self.current_batch = [None for _ in range(self.n_iter)] 23 | self.next_batch = [None for _ in range(self.n_iter)] 24 | 25 | def prefetch_func(self, i): 26 | """Thread entry""" 27 | while True: 28 | self.data_taken[i].wait() 29 | if not self.started: 30 | break 31 | try: 32 | blobs_list = [] 33 | cnt = 0 34 | while cnt < num_gpu: 35 | blobs = next(iters) 36 | if blobs['is_valid']: 37 | cnt += 1 38 | blobs_list.append(blobs) 39 | 40 | #for gpu_id in range(num_gpu): 41 | # blobs = next(iters) 42 | # blobs_list.append(blobs) 43 | self.next_batch[i] = blobs_list 44 | except StopIteration: 45 | self.next_batch[i] = None 46 | self.data_taken[i].clear() 47 | self.data_ready[i].set() 48 | 49 | self.prefetch_threads = [ 50 | threading.Thread(target=prefetch_func, args=[self, i]) \ 51 | for i in range(self.n_iter)] 52 | for thread in self.prefetch_threads: 53 | thread.setDaemon(True) 54 | thread.start() 55 | 56 | def __del__(self): 57 | self.started = False 58 | for e in self.data_taken: 59 | e.set() 60 | for thread in self.prefetch_threads: 61 | thread.join() 62 | 63 | def iter_next(self): 64 | for e in self.data_ready: 65 | e.wait() 66 | if self.next_batch[0] is None: 67 | return False 68 | else: 69 | self.current_batch = self.next_batch[0] 70 | for e in self.data_ready: 71 | e.clear() 72 | for e in self.data_taken: 73 | e.set() 74 | return True 75 | 76 | def forward(self): 77 | """Get blobs and copy them into this layer's top blob vector.""" 78 | if self.iter_next(): 79 | return self.current_batch 80 | else: 81 | raise StopIteration 82 | -------------------------------------------------------------------------------- /lib/utils/dpflow/serialize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Copy from Tensorpack 4 | 5 | import sys 6 | import msgpack 7 | import msgpack_numpy 8 | msgpack_numpy.patch() 9 | 10 | # https://github.com/apache/arrow/pull/1223#issuecomment-359895666 11 | old_mod = sys.modules.get('torch', None) 12 | sys.modules['torch'] = None 13 | try: 14 | import pyarrow as pa 15 | except ImportError: 16 | pa = None 17 | if old_mod is not None: 18 | sys.modules['torch'] = old_mod 19 | else: 20 | del sys.modules['torch'] 21 | 22 | 23 | __all__ = ['loads', 'dumps'] 24 | 25 | 26 | def dumps_msgpack(obj): 27 | """ 28 | Serialize an object. 29 | Returns: 30 | Implementation-dependent bytes-like object 31 | """ 32 | return msgpack.dumps(obj, use_bin_type=True) 33 | 34 | 35 | def loads_msgpack(buf): 36 | """ 37 | Args: 38 | buf: the output of `dumps`. 39 | """ 40 | return msgpack.loads(buf, raw=False) 41 | 42 | 43 | def dumps_pyarrow(obj): 44 | """ 45 | Serialize an object. 46 | 47 | Returns: 48 | Implementation-dependent bytes-like object 49 | """ 50 | return pa.serialize(obj).to_buffer() 51 | 52 | 53 | def loads_pyarrow(buf): 54 | """ 55 | Args: 56 | buf: the output of `dumps`. 57 | """ 58 | return pa.deserialize(buf) 59 | 60 | 61 | if pa is None: 62 | loads = loads_msgpack 63 | dumps = dumps_msgpack 64 | else: 65 | loads = loads_pyarrow 66 | dumps = dumps_pyarrow -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python3 setup.py build_ext --inplace 3 | rm -rf build 4 | clean: 5 | rm -rf */*.pyc 6 | rm -rf */*.so 7 | -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zengarden/light_head_rcnn/790f94e4e1481fbc403b101a763d4a9df56ee32a/lib/utils/py_faster_rcnn_utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | import cv2 15 | 16 | 17 | def im_list_to_blob(ims): 18 | """Convert a list of images into a network input. 19 | 20 | Assumes images are already prepared (means subtracted, BGR order, ...). 21 | """ 22 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 23 | num_images = len(ims) 24 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 25 | dtype=np.float32) 26 | for i in range(num_images): 27 | im = ims[i] 28 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 29 | 30 | return blob 31 | 32 | 33 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 34 | """Mean subtract and scale an image for use in a blob.""" 35 | im = im.astype(np.float32, copy=False) 36 | im -= pixel_means 37 | im_shape = im.shape 38 | im_size_min = np.min(im_shape[0:2]) 39 | im_size_max = np.max(im_shape[0:2]) 40 | im_scale = float(target_size) / float(im_size_min) 41 | # Prevent the biggest axis from being more than MAX_SIZE 42 | if np.round(im_scale * im_size_max) > max_size: 43 | im_scale = float(max_size) / float(im_size_max) 44 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 45 | interpolation=cv2.INTER_LINEAR) 46 | 47 | return im, im_scale 48 | -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/boxes_grid.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Subcategory CNN 3 | # Copyright (c) 2015 CVGL Stanford 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Yu Xiang 6 | # -------------------------------------------------------- 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | import math 13 | from config import cfg 14 | 15 | 16 | def get_boxes_grid(image_height, image_width): 17 | """ 18 | Return the boxes on image grid. 19 | """ 20 | 21 | # height and width of the heatmap 22 | if cfg.NET_NAME == 'CaffeNet': 23 | height = np.floor((image_height * max(cfg.TRAIN.SCALES) - 1) / 4.0 + 1) 24 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 25 | height = np.floor((height - 1) / 2.0 + 1 + 0.5) 26 | 27 | width = np.floor((image_width * max(cfg.TRAIN.SCALES) - 1) / 4.0 + 1) 28 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 29 | width = np.floor((width - 1) / 2.0 + 1 + 0.5) 30 | elif cfg.NET_NAME == 'VGGnet': 31 | height = np.floor(image_height * max(cfg.TRAIN.SCALES) / 2.0 + 0.5) 32 | height = np.floor(height / 2.0 + 0.5) 33 | height = np.floor(height / 2.0 + 0.5) 34 | height = np.floor(height / 2.0 + 0.5) 35 | 36 | width = np.floor(image_width * max(cfg.TRAIN.SCALES) / 2.0 + 0.5) 37 | width = np.floor(width / 2.0 + 0.5) 38 | width = np.floor(width / 2.0 + 0.5) 39 | width = np.floor(width / 2.0 + 0.5) 40 | else: 41 | assert (1), 'The network architecture is not supported in utils.get_boxes_grid!' 42 | 43 | # compute the grid box centers 44 | h = np.arange(height) 45 | w = np.arange(width) 46 | y, x = np.meshgrid(h, w, indexing='ij') 47 | centers = np.dstack((x, y)) 48 | centers = np.reshape(centers, (-1, 2)) 49 | num = centers.shape[0] 50 | 51 | # compute width and height of grid box 52 | area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE 53 | aspect = cfg.TRAIN.ASPECTS # height / width 54 | num_aspect = len(aspect) 55 | widths = np.zeros((1, num_aspect), dtype=np.float32) 56 | heights = np.zeros((1, num_aspect), dtype=np.float32) 57 | for i in range(num_aspect): 58 | widths[0, i] = math.sqrt(area / aspect[i]) 59 | heights[0, i] = widths[0, i] * aspect[i] 60 | 61 | # construct grid boxes 62 | centers = np.repeat(centers, num_aspect, axis=0) 63 | widths = np.tile(widths, num).transpose() 64 | heights = np.tile(heights, num).transpose() 65 | 66 | x1 = np.reshape(centers[:, 0], (-1, 1)) - widths * 0.5 67 | x2 = np.reshape(centers[:, 0], (-1, 1)) + widths * 0.5 68 | y1 = np.reshape(centers[:, 1], (-1, 1)) - heights * 0.5 69 | y2 = np.reshape(centers[:, 1], (-1, 1)) + heights * 0.5 70 | 71 | boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE 72 | 73 | return boxes_grid, centers[:, 0], centers[:, 1] 74 | -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def nms(dets, thresh): 11 | x1 = dets[:, 0] 12 | y1 = dets[:, 1] 13 | x2 = dets[:, 2] 14 | y2 = dets[:, 3] 15 | scores = dets[:, 4] 16 | 17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 18 | order = scores.argsort()[::-1] 19 | 20 | keep = [] 21 | while order.size > 0: 22 | i = order[0] 23 | keep.append(i) 24 | xx1 = np.maximum(x1[i], x1[order[1:]]) 25 | yy1 = np.maximum(y1[i], y1[order[1:]]) 26 | xx2 = np.minimum(x2[i], x2[order[1:]]) 27 | yy2 = np.minimum(y2[i], y2[order[1:]]) 28 | 29 | w = np.maximum(0.0, xx2 - xx1 + 1) 30 | h = np.maximum(0.0, yy2 - yy1 + 1) 31 | inter = w * h 32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 33 | 34 | inds = np.where(ovr <= thresh)[0] 35 | order = order[inds + 1] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in cudaconfig.items(): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print(extra_postargs) 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | # run the customize_compiler 102 | class custom_build_ext(build_ext): 103 | def build_extensions(self): 104 | customize_compiler_for_nvcc(self.compiler) 105 | build_ext.build_extensions(self) 106 | 107 | ext_modules = [ 108 | Extension( 109 | "cython_bbox", 110 | ["bbox.pyx"], 111 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 112 | include_dirs = [numpy_include] 113 | ), 114 | Extension( 115 | "cython_nms", 116 | ["nms.pyx"], 117 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 118 | include_dirs = [numpy_include] 119 | ) 120 | # Extension( 121 | # "cpu_nms", 122 | # ["cpu_nms.pyx"], 123 | # extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 124 | # include_dirs = [numpy_include] 125 | # ) 126 | ] 127 | 128 | setup( 129 | name='tf_faster_rcnn', 130 | ext_modules=ext_modules, 131 | # inject our custom trigger 132 | cmdclass={'build_ext': custom_build_ext}, 133 | ) 134 | -------------------------------------------------------------------------------- /lib/utils/py_faster_rcnn_utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | self.warm_up = 0 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | if self.warm_up < 100: 28 | self.warm_up += 1 29 | return self.diff 30 | else: 31 | self.total_time += self.diff 32 | self.calls += 1 33 | self.average_time = self.total_time / self.calls 34 | 35 | if average: 36 | return self.average_time 37 | else: 38 | return self.diff 39 | -------------------------------------------------------------------------------- /lib/utils/py_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | import logging, os 8 | 9 | 10 | class QuickLogger: 11 | def __init__(self, log_dir, log_name='train_logs.txt'): 12 | # set log 13 | self.logger = logging.getLogger() 14 | self.logger.setLevel(logging.INFO) 15 | log_file = os.path.join(log_dir, log_name) 16 | if not os.path.exists(log_dir): 17 | os.makedirs(log_dir) 18 | formatter = logging.Formatter("%(message)s") 19 | 20 | file_log = logging.FileHandler(log_file, mode='w') 21 | file_log.setLevel(logging.INFO) 22 | file_log.setFormatter(formatter) 23 | # console_log = logging.StreamHandler() 24 | # console_log.setLevel(logging.INFO) 25 | # console_log.setFormatter(formatter) 26 | # self.logger.addHandler(console_log) 27 | self.logger.addHandler(file_log) 28 | 29 | def get_logger(self): 30 | return self.logger 31 | -------------------------------------------------------------------------------- /lib/utils/py_utils/logger.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | import logging 7 | import os 8 | 9 | 10 | class QuickLogger: 11 | def __init__(self, log_dir, log_name='train_logs.txt'): 12 | # set log 13 | self.logger = logging.getLogger() 14 | self.logger.setLevel(logging.INFO) 15 | log_file = os.path.join(log_dir, log_name) 16 | if not os.path.exists(log_dir): 17 | os.makedirs(log_dir) 18 | formatter = logging.Formatter("%(message)s") 19 | 20 | file_log = logging.FileHandler(log_file, mode='w') 21 | file_log.setLevel(logging.INFO) 22 | file_log.setFormatter(formatter) 23 | # console_log = logging.StreamHandler() 24 | # console_log.setLevel(logging.INFO) 25 | # console_log.setFormatter(formatter) 26 | # self.logger.addHandler(console_log) 27 | self.logger.addHandler(file_log) 28 | 29 | def get_logger(self): 30 | return self.logger 31 | -------------------------------------------------------------------------------- /lib/utils/py_utils/misc.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | import os 8 | 9 | 10 | def ensure_dir(path): 11 | """create directories if *path* does not exist""" 12 | if not os.path.isdir(path): 13 | os.makedirs(path) 14 | 15 | 16 | def parse_devices(gpu_ids): 17 | if '-' in gpu_ids: 18 | gpus = gpu_ids.split('-') 19 | gpus[0] = int(gpus[0]) 20 | gpus[1] = int(gpus[1]) + 1 21 | parsed_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) 22 | return parsed_ids 23 | else: 24 | return gpu_ids 25 | 26 | 27 | if __name__ == '__main__': 28 | gpu_ids = "0-7" 29 | print(parse_devices(gpu_ids)) 30 | gpu_ids = "0,1,2,3,4,5,6,7" 31 | print(parse_devices(gpu_ids)) 32 | -------------------------------------------------------------------------------- /lib/utils/tf_utils/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | if __name__ == '__main__': 8 | pass -------------------------------------------------------------------------------- /lib/utils/tf_utils/basemodel/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ -------------------------------------------------------------------------------- /lib/utils/tf_utils/debug_opr.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | from IPython import embed 8 | import tensorflow as tf 9 | import numpy as np 10 | 11 | def _debug_single(x): 12 | print(x.shape) 13 | np.save('/tmp/x', x) 14 | embed() 15 | return True 16 | 17 | 18 | def _debug_two(x, y): 19 | embed() 20 | return True 21 | 22 | 23 | def _debug_three(x, y, z): 24 | embed() 25 | return True 26 | 27 | 28 | def _debug_four(x, y, z, u): 29 | embed() 30 | return True 31 | -------------------------------------------------------------------------------- /lib/utils/tf_utils/lr_policy.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | from abc import ABCMeta, abstractmethod 7 | 8 | 9 | class BaseLR(): 10 | __metaclass__ = ABCMeta 11 | 12 | @abstractmethod 13 | def get_lr(self, cur_iter): pass 14 | 15 | 16 | class MultiStageLR(BaseLR): 17 | def __init__(self, lr_stages): 18 | assert type(lr_stages) in [list, tuple] and len(lr_stages[0]) == 2, \ 19 | 'lr_stages must be list or tuple, with [iters, lr] format' 20 | self._lr_stagess = lr_stages 21 | 22 | def get_lr(self, epoch): 23 | for it_lr in self._lr_stagess: 24 | if epoch < it_lr[0]: 25 | return it_lr[1] 26 | 27 | 28 | class LinearIncreaseLR(BaseLR): 29 | def __init__(self, start_lr, end_lr, warm_iters): 30 | self._start_lr = start_lr 31 | self._end_lr = end_lr 32 | self._warm_iters = warm_iters 33 | self._delta_lr = (end_lr - start_lr) / warm_iters 34 | 35 | def get_lr(self, cur_epoch): 36 | return self._start_lr + cur_epoch * self._delta_lr 37 | 38 | 39 | if __name__ == '__main__': 40 | lr = LinearIncreaseLR(0.00001, 0.1, 5) 41 | from IPython import embed; 42 | 43 | embed() 44 | print(lr._delta_lr) 45 | for i in range(5): 46 | print(lr.get_lr(i)) 47 | -------------------------------------------------------------------------------- /lib/utils/tf_utils/model_helper.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | 7 | from tensorflow.python import pywrap_tensorflow 8 | import tensorflow as tf 9 | 10 | 11 | def get_variables_in_checkpoint_file(file_name): 12 | try: 13 | reader = pywrap_tensorflow.NewCheckpointReader(file_name) 14 | var_to_shape_map = reader.get_variable_to_shape_map() 15 | return var_to_shape_map 16 | except Exception as e: # pylint: disable=broad-except 17 | print(str(e)) 18 | if "corrupted compressed block contents" in str(e): 19 | print( 20 | "It's likely that your checkpoint file has been compressed " 21 | "with SNAPPY.") 22 | 23 | 24 | 25 | 26 | def average_gradients(tower_grads): 27 | """Calculate the average gradient for each shared variable across all towers. 28 | Note that this function provides a synchronization point across all towers. 29 | Args: 30 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 31 | is over individual gradients. The inner list is over the gradient 32 | calculation for each tower. 33 | Returns: 34 | List of pairs of (gradient, variable) where the gradient has been averaged 35 | across all towers. 36 | """ 37 | average_grads = [] 38 | for grad_and_vars in zip(*tower_grads): 39 | # Note that each grad_and_vars looks like the following: 40 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 41 | grads = [] 42 | for g, _ in grad_and_vars: 43 | # Add 0 dimension to the gradients to represent the tower. 44 | expanded_g = tf.expand_dims(g, 0) 45 | 46 | # Append on a 'tower' dimension which we will average over below. 47 | grads.append(expanded_g) 48 | 49 | # Average over the 'tower' dimension. 50 | grad = tf.concat(axis=0, values=grads) 51 | grad = tf.reduce_mean(grad, 0) 52 | 53 | # Keep in mind that the Variables are redundant because they are shared 54 | # across towers. So .. we will just return the first tower's pointer to 55 | # the Variable. 56 | v = grad_and_vars[0][1] 57 | grad_and_var = (grad, v) 58 | average_grads.append(grad_and_var) 59 | return average_grads 60 | 61 | 62 | def sum_gradients(tower_grads): 63 | """Calculate the average gradient for each shared variable across all towers. 64 | Note that this function provides a synchronization point across all towers. 65 | Args: 66 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 67 | is over individual gradients. The inner list is over the gradient 68 | calculation for each tower. 69 | Returns: 70 | List of pairs of (gradient, variable) where the gradient has been averaged 71 | across all towers. 72 | """ 73 | sum_grads = [] 74 | for grad_and_vars in zip(*tower_grads): 75 | # Note that each grad_and_vars looks like the following: 76 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 77 | grads = [] 78 | for g, _ in grad_and_vars: 79 | # Add 0 dimension to the gradients to represent the tower. 80 | expanded_g = tf.expand_dims(g, 0) 81 | 82 | # Append on a 'tower' dimension which we will average over below. 83 | grads.append(expanded_g) 84 | 85 | # Average over the 'tower' dimension. 86 | grad = tf.concat(axis=0, values=grads) 87 | grad = tf.reduce_sum(grad, 0) 88 | 89 | # Keep in mind that the Variables are redundant because they are shared 90 | # across towers. So .. we will just return the first tower's pointer to 91 | # the Variable. 92 | v = grad_and_vars[0][1] 93 | grad_and_var = (grad, v) 94 | sum_grads.append(grad_and_var) 95 | return sum_grads 96 | -------------------------------------------------------------------------------- /lib/utils/tf_utils/model_parallel.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: jemmy li 4 | @contact: zengarden2009@gmail.com 5 | """ 6 | import tensorflow as tf 7 | 8 | def average_gradients(tower_grads): 9 | """Calculate the average gradient for each shared variable across all towers. 10 | Note that this function provides a synchronization point across all towers. 11 | Args: 12 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 13 | is over individual gradients. The inner list is over the gradient 14 | calculation for each tower. 15 | Returns: 16 | List of pairs of (gradient, variable) where the gradient has been averaged 17 | across all towers. 18 | """ 19 | average_grads = [] 20 | for grad_and_vars in zip(*tower_grads): 21 | # Note that each grad_and_vars looks like the following: 22 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 23 | grads = [] 24 | for g, _ in grad_and_vars: 25 | # Add 0 dimension to the gradients to represent the tower. 26 | expanded_g = tf.expand_dims(g, 0) 27 | 28 | # Append on a 'tower' dimension which we will average over below. 29 | grads.append(expanded_g) 30 | 31 | # Average over the 'tower' dimension. 32 | grad = tf.concat(axis=0, values=grads) 33 | grad = tf.reduce_mean(grad, 0) 34 | 35 | # Keep in mind that the Variables are redundant because they are shared 36 | # across towers. So .. we will just return the first tower's pointer to 37 | # the Variable. 38 | v = grad_and_vars[0][1] 39 | grad_and_var = (grad, v) 40 | average_grads.append(grad_and_var) 41 | return average_grads 42 | 43 | def sum_gradients(tower_grads): 44 | """Calculate the average gradient for each shared variable across all towers. 45 | Note that this function provides a synchronization point across all towers. 46 | Args: 47 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 48 | is over individual gradients. The inner list is over the gradient 49 | calculation for each tower. 50 | Returns: 51 | List of pairs of (gradient, variable) where the gradient has been averaged 52 | across all towers. 53 | """ 54 | sum_grads = [] 55 | for grad_and_vars in zip(*tower_grads): 56 | # Note that each grad_and_vars looks like the following: 57 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 58 | grads = [] 59 | for g, _ in grad_and_vars: 60 | # Add 0 dimension to the gradients to represent the tower. 61 | expanded_g = tf.expand_dims(g, 0) 62 | 63 | # Append on a 'tower' dimension which we will average over below. 64 | grads.append(expanded_g) 65 | 66 | # Average over the 'tower' dimension. 67 | grad = tf.concat(axis=0, values=grads) 68 | grad = tf.reduce_sum(grad, 0) 69 | 70 | # Keep in mind that the Variables are redundant because they are shared 71 | # across towers. So .. we will just return the first tower's pointer to 72 | # the Variable. 73 | v = grad_and_vars[0][1] 74 | grad_and_var = (grad, v) 75 | sum_grads.append(grad_and_var) 76 | return sum_grads 77 | 78 | 79 | if __name__ == '__main__': 80 | pass 81 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | @author: zeming li 4 | @contact: zengarden2009@gmail.com 5 | @file: __init__.py.py 6 | """ 7 | 8 | if __name__ == '__main__': 9 | pass --------------------------------------------------------------------------------