├── NP-RepMet.pptx ├── NP-RepMet_Poster.pdf ├── Readme.md ├── experiments ├── cfgs │ └── resnet_v1_101_voc0712_trainval_fpn_dcn_oneshot_end2end_ohem_8.yaml ├── fpn_end2end_train_test.py └── fpn_test.py ├── fpn ├── __init__.py ├── __pycache__ │ ├── _init_paths.cpython-36.pyc │ └── _init_paths.cpython-37.pyc ├── _init_paths.py ├── _init_paths.pyc ├── config │ ├── __init__.py │ ├── __init__.pyc │ ├── config.py │ └── config.pyc ├── core │ ├── DataParallelExecutorGroup.py │ ├── DataParallelExecutorGroup.pyc │ ├── __init__.py │ ├── __init__.pyc │ ├── callback.py │ ├── callback.pyc │ ├── loader.py │ ├── loader.pyc │ ├── metric.py │ ├── metric.pyc │ ├── module.py │ ├── module.pyc │ ├── rcnn.py │ ├── rcnn.pyc │ ├── tester.py │ └── tester.pyc ├── few_shot_benchmark_1shot.py ├── few_shot_benchmark_5shot.py ├── function │ ├── __init__.py │ ├── __init__.pyc │ ├── test_rcnn.py │ ├── test_rcnn.pyc │ └── train_rcnn.py ├── operator_py │ ├── __init__.py │ ├── __init__.pyc │ ├── box_annotator_ohem.py │ ├── box_annotator_ohem.pyc │ ├── fpn_roi_pooling.py │ ├── fpn_roi_pooling.pyc │ ├── predict_rois_deltas.py │ ├── proposal_target.py │ ├── proposal_target.pyc │ ├── pyramid_proposal.py │ └── pyramid_proposal.pyc ├── symbols │ ├── __init__.py │ ├── __init__.pyc │ ├── resnet_v1_101_fpn_dcn_rcnn.py │ ├── resnet_v1_101_fpn_dcn_rcnn.pyc │ ├── resnet_v1_101_fpn_dcn_rcnn_oneshot_v3.py │ ├── resnet_v1_101_fpn_dcn_rcnn_oneshot_v3.pyc │ ├── resnet_v1_101_fpn_dcn_rcnn_oneshot_v3_1.py │ ├── resnet_v1_101_fpn_dcn_rcnn_oneshot_v3_org.py │ ├── resnet_v1_101_fpn_dcn_rcnn_rep_noemb.py │ ├── resnet_v1_101_fpn_dcn_rcnn_rep_noemb.pyc │ ├── resnet_v1_101_fpn_rcnn.py │ └── resnet_v1_101_fpn_rcnn.pyc ├── test.py ├── test.pyc ├── train_end2end.py └── train_end2end.pyc ├── lib ├── Makefile ├── __init__.py ├── bbox │ ├── .gitignore │ ├── __init__.py │ ├── __init__.pyc │ ├── bbox.cpython-36m-x86_64-linux-gnu.so │ ├── bbox.pyx │ ├── bbox.so │ ├── bbox_regression.py │ ├── bbox_regression.pyc │ ├── bbox_transform.py │ ├── bbox_transform.pyc │ ├── bbox_utils.py │ ├── bbox_utils.pyc │ ├── setup_linux.py │ └── setup_windows.py ├── dataset │ ├── Defects.py │ ├── Defects.pyc │ ├── JES.py │ ├── JES_docker.py │ ├── JES_scenes.py │ ├── Logo.py │ ├── Logo.pyc │ ├── SCENT.py │ ├── SCENT.pyc │ ├── __init__.py │ ├── __init__.pyc │ ├── cityscape.py │ ├── coco.py │ ├── coco.pyc │ ├── ds_utils.py │ ├── ds_utils.pyc │ ├── imagenet.py │ ├── imagenet.pyc │ ├── imagenet_voc_eval.py │ ├── imagenet_voc_eval.pyc │ ├── imdb.py │ ├── imdb.pyc │ ├── pascal_voc.py │ ├── pascal_voc.pyc │ ├── pascal_voc_eval.py │ ├── pascal_voc_eval.pyc │ └── pycocotools │ │ ├── .gitignore │ │ ├── UPSTREAM_REV │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── _mask.cpython-36m-x86_64-linux-gnu.so │ │ ├── _mask.pyx │ │ ├── _mask.so │ │ ├── coco.py │ │ ├── coco.pyc │ │ ├── cocoeval.py │ │ ├── cocoeval.pyc │ │ ├── mask.py │ │ ├── mask.pyc │ │ ├── maskApi.c │ │ ├── maskApi.h │ │ ├── setup_linux.py │ │ └── setup_windows.py ├── mask │ ├── __init__.py │ ├── __init__.pyc │ ├── mask_transform.py │ └── mask_transform.pyc ├── nms │ ├── __init__.py │ ├── __init__.pyc │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── cpu_nms.so │ ├── gpu_nms.cpp │ ├── gpu_nms.cu │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── gpu_nms.so │ ├── gpu_nms_10.so │ ├── gpu_nms_8.so │ ├── gpu_nms_9.so │ ├── nms.py │ ├── nms.pyc │ ├── nms_kernel.cu │ ├── setup_linux.py │ ├── setup_windows.py │ └── setup_windows_cuda.py ├── rpn │ ├── __init__.py │ ├── __init__.pyc │ ├── generate_anchor.py │ ├── generate_anchor.pyc │ ├── rpn.py │ └── rpn.pyc ├── segmentation │ ├── __init__.py │ └── segmentation.py └── utils │ ├── JES3D_transform.py │ ├── JES3D_transform_utils.py │ ├── ObjDetStats.py │ ├── PerfClassStats.py │ ├── PerfStats.py │ ├── PrefetchingIter.py │ ├── PrefetchingIter.pyc │ ├── __init__.py │ ├── __init__.pyc │ ├── combine_model.py │ ├── constant_symbol.py │ ├── constant_symbol.pyc │ ├── create_logger.py │ ├── create_logger.pyc │ ├── get_image_data.py │ ├── image.py │ ├── image.pyc │ ├── image_processing.py │ ├── load_data.py │ ├── load_data.pyc │ ├── load_data_ext.py │ ├── load_model.py │ ├── load_model.pyc │ ├── lr_scheduler.py │ ├── lr_scheduler.pyc │ ├── mask_coco2voc.py │ ├── mask_coco2voc.pyc │ ├── mask_voc2coco.py │ ├── mask_voc2coco.pyc │ ├── miscellaneous.py │ ├── miscellaneous.pyc │ ├── roidb.py │ ├── save_model.py │ ├── show_boxes.py │ ├── show_boxes.pyc │ ├── show_masks.py │ ├── show_offset.py │ ├── symbol.py │ ├── symbol.pyc │ ├── tfs_vis.py │ ├── tfs_vis.pyc │ ├── tictoc.py │ └── tictoc.pyc └── output └── benchmarks ├── RepMet_inloc ├── RepMet_inloc_1shot_5way_10qpc_500epi │ └── 1shot.log └── RepMet_inloc_5shot_5way_10qpc_500epi │ └── 5shot.log ├── RepMet_inloc_10shot_5way_10qpc_500epi_episodes.npz ├── RepMet_inloc_1shot_5way_10qpc_500epi_episodes.npz └── RepMet_inloc_5shot_5way_10qpc_500epi_episodes.npz /NP-RepMet.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/NP-RepMet.pptx -------------------------------------------------------------------------------- /NP-RepMet_Poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/NP-RepMet_Poster.pdf -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | ### Introduction 2 | This is the codebase for the NeurIPS 2020 paper "**Restoring Negative Information in Few-Shot Object Detection**". 3 | The code will be continuously improved. For any questions/issues, please open an issue in this repository or email me at . 4 | ### Requirements 5 | The codebase is modified on the basis of RepMet (). It is built based on Python 2.7, MXNet 1.5.1, and CUDA 10.0.130. Other packages include matplotlib, opencv-python, PyYAML, etc. You may need to change some file paths to run the code. 6 | ### Preparing Data 7 | #### Dataset 8 | wget -c http://image-net.org/image/ILSVRC2017/ILSVRC2017_CLS-LOC.tar.gz 9 | #### Data and Pre-trained Model 10 | Please download the data and pre-trained model from and put it in the root directory. 11 | ### Code execution 12 | #### NP-RepMet Evaluation: 13 | To reconstruct the 1-shot, 5-way experiment with the NP-RepMet from the NeurIPS paper, run 14 | `python fpn/few_shot_benchmark_1_shot.py --test_name=RepMet_inloc --Nshot=1 --Nway=5 --Nquery_cat=10 --Nepisodes=500` 15 | 16 | To reconstruct the 5-shot, 5-way experiment with the NP-RepMet from the NeurIPS paper, run 17 | `python fpn/few_shot_benchmark_5_shot.py --test_name=RepMet_inloc --Nshot=5 --Nway=5 --Nquery_cat=10 --Nepisodes=500` 18 | #### NP-RepMet Traning: 19 | To train the model from scratch, run 20 | `python ./experiments/fpn_end2end_train_test.py --cfg=./experiments/cfgs/resnet_v1_101_voc0712_trainval_fpn_dcn_oneshot_end2end_ohem_8.yaml` -------------------------------------------------------------------------------- /experiments/cfgs/resnet_v1_101_voc0712_trainval_fpn_dcn_oneshot_end2end_ohem_8.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | MXNET_VERSION: "mxnet" 3 | output_path: "./output/fpn/voc_imagenet" 4 | symbol: resnet_v1_101_fpn_dcn_rcnn_oneshot_v3 5 | gpus: '0' #'0,1,2,3' #'0,1,5,7' 6 | CLASS_AGNOSTIC: true 7 | SCALES: 8 | - 800 9 | - 1280 10 | TEST_SCALES: [[800, 1280]] # single scale testing 11 | #TEST_SCALES: [[480, 800], [576, 900], [688, 1100], [800, 1200], [1200, 1600], [1400, 2000]] # multi-scale testing 12 | default: 13 | frequent: 10 14 | kvstore: device 15 | network: 16 | pretrained: "./data/fpn_dcn_coco" 17 | pretrained_epoch: 0 18 | pretrained_weights_are_priority: true 19 | PIXEL_MEANS: 20 | - 103.06 21 | - 115.90 22 | - 123.15 23 | IMAGE_STRIDE: 32 24 | RCNN_FEAT_STRIDE: 16 25 | RPN_FEAT_STRIDE: 26 | - 4 27 | - 8 28 | - 16 29 | - 32 30 | - 64 31 | FIXED_PARAMS: 32 | - conv1 33 | - bn_conv1 34 | - res2 35 | - bn2 36 | - gamma 37 | - beta 38 | FIXED_PARAMS_SHARED: 39 | - conv1 40 | - bn_conv1 41 | - res2 42 | - bn2 43 | - res3 44 | - bn3 45 | - res4 46 | - bn4 47 | - gamma 48 | - beta 49 | ANCHOR_RATIOS: 50 | - 0.5 51 | - 1 52 | - 2 53 | ANCHOR_SCALES: 54 | - 8 55 | NUM_ANCHORS: 3 56 | EMBEDDING_DIM: 256 57 | REPS_PER_CLASS: 5 58 | SIGMA: 0.5 59 | EMBED_LOSS_ENABLED: True 60 | EMBED_LOSS_MARGIN: 0.5 61 | SOFTMAX_ENABLED: True 62 | REP_L2_NORM: True 63 | EMBED_L2_NORM: True 64 | SEPARABLE_REPS: False 65 | REPS_CLS_LOSS: False 66 | SEPARABLE_REPS_INIT: True 67 | ADDITIONAL_LINEAR_CLS_LOSS: True 68 | dataset: 69 | NUM_CLASSES: 122 70 | balance_classes: true 71 | num_ex_per_class: 200 72 | max_num_extra_classes: 122 73 | dataset: PascalVOC;ImageNet 74 | #dataset_path: "/dccstor/leonidka1/data/VOCdevkit;/dccstor/leonidka1/data/imagenet/ILSVRC" 75 | dataset_path: "/home/v-yukyan/yangyk/dataset/VOCdevkit;/home/v-yukyan/yangyk/dataset/imagenet/ILSVRC" 76 | cls_filter_files: './data/Imagenet_LOC/Pascal_inloc_cls2id_map.pkl:./data/Imagenet_LOC/Pascal_inloc_first101_categories.txt' 77 | image_set: 2007_trainval+2012_trainval;train_loc 78 | per_category_epoch_max: 0;10 79 | root_path: "./data" 80 | test_image_set: ;val_partial 81 | proposal: rpn 82 | TRAIN: 83 | UPDATE_REPS_VIA_CLUSTERING: true 84 | UPDATE_REPS_STOP_EPOCH: 3 85 | UPDATE_REPS_START_EPOCH: 9 86 | 87 | NUMEX_FOR_CLUSTERING: 200 88 | REPS_LR_MULT: 1 #0.01 89 | lr: 0.01 #0.001 90 | warmup_lr: 0.001 91 | warmup_step: 250 92 | warmup: true 93 | # lr: 0.000001 94 | lr_step: '4,6,20,30' 95 | wd: 0.0001 96 | begin_epoch: 0 97 | end_epoch: 30 98 | model_prefix: 'fpn_pascal_imagenet' 99 | # whether resume training 100 | RESUME: false 101 | # whether flip image 102 | FLIP: false 103 | # whether shuffle image 104 | SHUFFLE: true 105 | # whether use OHEM 106 | ENABLE_OHEM: true 107 | # size of images for each device, 2 for rcnn, 1 for rpn and e2e 108 | BATCH_IMAGES: 1 109 | # e2e changes behavior of anchor loader and metric 110 | END2END: true 111 | # group images with similar aspect ratio 112 | ASPECT_GROUPING: true 113 | # R-CNN 114 | # rcnn rois batch size 115 | BATCH_ROIS: -1 116 | BATCH_ROIS_OHEM: 512 #128 117 | # rcnn rois sampling params 118 | FG_FRACTION: 0.25 119 | FG_THRESH: 0.5 120 | BG_THRESH_HI: 0.5 121 | BG_THRESH_LO: 0.0 122 | # rcnn bounding box regression params 123 | BBOX_REGRESSION_THRESH: 0.5 124 | BBOX_WEIGHTS: 125 | - 1.0 126 | - 1.0 127 | - 1.0 128 | - 1.0 129 | # RPN anchor loader 130 | # rpn anchors batch size 131 | RPN_BATCH_SIZE: 256 132 | # rpn anchors sampling params 133 | RPN_FG_FRACTION: 0.5 134 | RPN_POSITIVE_OVERLAP: 0.7 135 | RPN_NEGATIVE_OVERLAP: 0.3 136 | RPN_CLOBBER_POSITIVES: false 137 | # rpn bounding box regression params 138 | RPN_BBOX_WEIGHTS: 139 | - 1.0 140 | - 1.0 141 | - 1.0 142 | - 1.0 143 | RPN_POSITIVE_WEIGHT: -1.0 144 | # used for end2end training 145 | # RPN proposal 146 | CXX_PROPOSAL: false 147 | RPN_NMS_THRESH: 0.7 148 | RPN_PRE_NMS_TOP_N: 12000 149 | RPN_POST_NMS_TOP_N: 2000 150 | RPN_MIN_SIZE: 0 151 | # approximate bounding box regression 152 | BBOX_NORMALIZATION_PRECOMPUTED: true 153 | BBOX_MEANS: 154 | - 0.0 155 | - 0.0 156 | - 0.0 157 | - 0.0 158 | BBOX_STDS: 159 | - 0.1 160 | - 0.1 161 | - 0.2 162 | - 0.2 163 | TEST: 164 | # use rpn to generate proposal 165 | HAS_RPN: true 166 | # size of images for each device 167 | BATCH_IMAGES: 1 168 | # RPN proposal 169 | CXX_PROPOSAL: false 170 | RPN_NMS_THRESH: 0.7 171 | RPN_PRE_NMS_TOP_N: 12000 172 | RPN_POST_NMS_TOP_N: 2000 173 | RPN_MIN_SIZE: 0 174 | # RPN generate proposal 175 | PROPOSAL_NMS_THRESH: 0.7 176 | PROPOSAL_PRE_NMS_TOP_N: 20000 177 | PROPOSAL_POST_NMS_TOP_N: 2000 178 | PROPOSAL_MIN_SIZE: 0 179 | # RCNN nms 180 | NMS: 0.3 181 | USE_SOFTNMS: true 182 | SOFTNMS_THRESH: 0.6 183 | test_epoch: 15 184 | max_per_image: 100 185 | # soft nms 186 | USE_SOFTNMS: true 187 | SOFTNMS_THRESH: 0.6 188 | -------------------------------------------------------------------------------- /experiments/fpn_end2end_train_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Modified by Guodong Zhang 6 | # -------------------------------------------------------- 7 | import os 8 | import sys 9 | os.environ['PYTHONUNBUFFERED'] = '1' 10 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 11 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 12 | 13 | # os.environ['MXNET_GPU_WORKER_NTHREADS'] = '4' 14 | # os.environ['MXNET_GPU_COPY_NTHREADS'] = '4' 15 | # os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' 16 | 17 | this_dir = os.path.dirname(__file__) 18 | sys.path.insert(0, os.path.join(this_dir, '..', 'fpn')) 19 | 20 | import matplotlib 21 | matplotlib.use('agg') 22 | 23 | import train_end2end 24 | import test 25 | 26 | # leonid: MXNet warmup 27 | import mxnet as mx 28 | #mxnetwarmup = mx.nd.ones((1,1), mx.gpu(0)) 29 | 30 | if __name__ == "__main__": 31 | train_end2end.main() 32 | test.main() 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /experiments/fpn_test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Licensed under The Apache-2.0 License [see LICENSE for details] 5 | # Modified by Guodong Zhang 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | import sys 10 | os.environ['PYTHONUNBUFFERED'] = '1' 11 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 12 | os.environ['MXNET_ENABLE_GPU_P2P'] = '0' 13 | # os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine' 14 | this_dir = os.path.dirname(__file__) 15 | sys.path.insert(0, os.path.join(this_dir, '..', '..', 'fpn')) 16 | 17 | import test 18 | 19 | # leonid: MXNet warmup 20 | import mxnet as mx 21 | mxnetwarmup = mx.nd.ones((1,1), mx.gpu(0)) 22 | 23 | if __name__ == "__main__": 24 | test.main() 25 | -------------------------------------------------------------------------------- /fpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/__init__.py -------------------------------------------------------------------------------- /fpn/__pycache__/_init_paths.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/__pycache__/_init_paths.cpython-36.pyc -------------------------------------------------------------------------------- /fpn/__pycache__/_init_paths.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/__pycache__/_init_paths.cpython-37.pyc -------------------------------------------------------------------------------- /fpn/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | lib_path = osp.join(this_dir, '..', 'lib') 11 | add_path(lib_path) 12 | -------------------------------------------------------------------------------- /fpn/_init_paths.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/_init_paths.pyc -------------------------------------------------------------------------------- /fpn/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/config/__init__.py -------------------------------------------------------------------------------- /fpn/config/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/config/__init__.pyc -------------------------------------------------------------------------------- /fpn/config/config.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/config/config.pyc -------------------------------------------------------------------------------- /fpn/core/DataParallelExecutorGroup.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/DataParallelExecutorGroup.pyc -------------------------------------------------------------------------------- /fpn/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/__init__.py -------------------------------------------------------------------------------- /fpn/core/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/__init__.pyc -------------------------------------------------------------------------------- /fpn/core/callback.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Copyright (c) 2019 IBM Corp 6 | # Licensed under The Apache-2.0 License [see LICENSE for details] 7 | # Modified by Yuwen Xiong 8 | # -------------------------------------------------------- 9 | 10 | import time 11 | import logging 12 | import mxnet as mx 13 | 14 | 15 | class Speedometer(object): 16 | def __init__(self, batch_size, frequent=50): 17 | self.batch_size = batch_size 18 | self.frequent = frequent 19 | self.init = False 20 | self.tic = 0 21 | self.last_count = 0 22 | 23 | def __call__(self, param): 24 | """Callback to Show speed.""" 25 | count = param.nbatch 26 | if self.last_count > count: 27 | self.init = False 28 | self.last_count = count 29 | 30 | if self.init: 31 | if count % self.frequent == 0: 32 | speed = self.frequent * self.batch_size / (time.time() - self.tic) 33 | s = '' 34 | if param.eval_metric is not None: 35 | name, value = param.eval_metric.get() 36 | s = "Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-" % (param.epoch, count, speed) 37 | for n, v in zip(name, value): 38 | s += "%s=%f,\t" % (n, v) 39 | else: 40 | s = "Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec" % (param.epoch, count, speed) 41 | 42 | logging.info(s) 43 | print(s) 44 | self.tic = time.time() 45 | else: 46 | self.init = True 47 | self.tic = time.time() 48 | 49 | 50 | def do_checkpoint(prefix, means, stds): 51 | def _callback(iter_no, sym, arg, aux): 52 | arg['bbox_pred_weight_test'] = (arg['bbox_pred_weight'].T * mx.nd.array(stds)).T 53 | arg['bbox_pred_bias_test'] = arg['bbox_pred_bias'] * mx.nd.array(stds) + mx.nd.array(means) 54 | mx.model.save_checkpoint(prefix, iter_no + 1, sym, arg, aux) 55 | arg.pop('bbox_pred_weight_test') 56 | arg.pop('bbox_pred_bias_test') 57 | return _callback 58 | -------------------------------------------------------------------------------- /fpn/core/callback.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/callback.pyc -------------------------------------------------------------------------------- /fpn/core/loader.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/loader.pyc -------------------------------------------------------------------------------- /fpn/core/metric.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/metric.pyc -------------------------------------------------------------------------------- /fpn/core/module.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/module.pyc -------------------------------------------------------------------------------- /fpn/core/rcnn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Copyright (c) 2019 IBM Corp 6 | # Licensed under The Apache-2.0 License [see LICENSE for details] 7 | # Modified by Yuwen Xiong 8 | # -------------------------------------------------------- 9 | """ 10 | Fast R-CNN: 11 | data = 12 | {'data': [num_images, c, h, w], 13 | 'rois': [num_rois, 5]} 14 | label = 15 | {'label': [num_rois], 16 | 'bbox_target': [num_rois, 4 * num_classes], 17 | 'bbox_weight': [num_rois, 4 * num_classes]} 18 | roidb extended format [image_index] 19 | ['image', 'height', 'width', 'flipped', 20 | 'boxes', 'gt_classes', 'gt_overlaps', 'max_classes', 'max_overlaps', 'bbox_targets'] 21 | """ 22 | 23 | import numpy as np 24 | import numpy.random as npr 25 | 26 | from utils.image import get_image, tensor_vstack 27 | from bbox.bbox_transform import bbox_overlaps, bbox_transform 28 | from bbox.bbox_regression import expand_bbox_regression_targets 29 | 30 | 31 | def get_rcnn_testbatch(roidb, cfg): 32 | """ 33 | return a dict of testbatch 34 | :param roidb: ['image', 'flipped'] + ['boxes'] 35 | :return: data, label, im_info 36 | """ 37 | # assert len(roidb) == 1, 'Single batch only' 38 | imgs, roidb = get_image(roidb, cfg) 39 | im_array = imgs 40 | im_info = [np.array([roidb[i]['im_info']], dtype=np.float32) for i in range(len(roidb))] 41 | 42 | im_rois = [roidb[i]['boxes'] for i in range(len(roidb))] 43 | rois = im_rois 44 | rois_array = [np.hstack((0 * np.ones((rois[i].shape[0], 1)), rois[i])) for i in range(len(rois))] 45 | 46 | data = [{'data': im_array[i], 47 | 'rois': rois_array[i]} for i in range(len(roidb))] 48 | label = {} 49 | 50 | return data, label, im_info 51 | 52 | 53 | def get_rcnn_batch(roidb, cfg): 54 | """ 55 | return a dict of multiple images 56 | :param roidb: a list of dict, whose length controls batch size 57 | ['images', 'flipped'] + ['gt_boxes', 'boxes', 'gt_overlap'] => ['bbox_targets'] 58 | :return: data, label 59 | """ 60 | num_images = len(roidb) 61 | imgs, roidb = get_image(roidb, cfg) 62 | im_array = tensor_vstack(imgs) 63 | 64 | assert cfg.TRAIN.BATCH_ROIS == -1 or cfg.TRAIN.BATCH_ROIS % cfg.TRAIN.BATCH_IMAGES == 0, \ 65 | 'BATCHIMAGES {} must divide BATCH_ROIS {}'.format(cfg.TRAIN.BATCH_IMAGES, cfg.TRAIN.BATCH_ROIS) 66 | 67 | if cfg.TRAIN.BATCH_ROIS == -1: 68 | rois_per_image = np.sum([iroidb['boxes'].shape[0] for iroidb in roidb]) 69 | fg_rois_per_image = rois_per_image 70 | else: 71 | rois_per_image = cfg.TRAIN.BATCH_ROIS / cfg.TRAIN.BATCH_IMAGES 72 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int) 73 | 74 | rois_array = list() 75 | labels_array = list() 76 | bbox_targets_array = list() 77 | bbox_weights_array = list() 78 | 79 | for im_i in range(num_images): 80 | roi_rec = roidb[im_i] 81 | 82 | # infer num_classes from gt_overlaps 83 | num_classes = roi_rec['gt_overlaps'].shape[1] 84 | 85 | # label = class RoI has max overlap with 86 | rois = roi_rec['boxes'] 87 | labels = roi_rec['max_classes'] 88 | overlaps = roi_rec['max_overlaps'] 89 | bbox_targets = roi_rec['bbox_targets'] 90 | 91 | im_rois, labels, bbox_targets, bbox_weights = \ 92 | sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, 93 | labels, overlaps, bbox_targets) 94 | 95 | # project im_rois 96 | # do not round roi 97 | rois = im_rois 98 | batch_index = im_i * np.ones((rois.shape[0], 1)) 99 | rois_array_this_image = np.hstack((batch_index, rois)) 100 | rois_array.append(rois_array_this_image) 101 | 102 | # add labels 103 | labels_array.append(labels) 104 | bbox_targets_array.append(bbox_targets) 105 | bbox_weights_array.append(bbox_weights) 106 | 107 | rois_array = np.array(rois_array) 108 | labels_array = np.array(labels_array) 109 | bbox_targets_array = np.array(bbox_targets_array) 110 | bbox_weights_array = np.array(bbox_weights_array) 111 | 112 | data = {'data': im_array, 113 | 'rois': rois_array} 114 | label = {'label': labels_array, 115 | 'bbox_target': bbox_targets_array, 116 | 'bbox_weight': bbox_weights_array} 117 | 118 | return data, label 119 | 120 | 121 | def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, 122 | labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): 123 | """ 124 | generate random sample of ROIs comprising foreground and background examples 125 | :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index 126 | :param fg_rois_per_image: foreground roi number 127 | :param rois_per_image: total roi number 128 | :param num_classes: number of classes 129 | :param labels: maybe precomputed 130 | :param overlaps: maybe precomputed (max_overlaps) 131 | :param bbox_targets: maybe precomputed 132 | :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) 133 | :return: (labels, rois, bbox_targets, bbox_weights) 134 | """ 135 | if labels is None: 136 | overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) 137 | gt_assignment = overlaps.argmax(axis=1) 138 | overlaps = overlaps.max(axis=1) 139 | labels = gt_boxes[gt_assignment, 4] 140 | 141 | 142 | ''' 143 | #yangyk 144 | print('gt_boxes:',gt_boxes[:,4]) 145 | print('gt_assignment:',gt_assignment) 146 | print('labels:',labels) 147 | print('rois shape:',rois.shape,'overlaps shape:',overlaps.shape,'labels shape',labels.shape) 148 | 149 | ''' 150 | 151 | 152 | 153 | # foreground RoI with FG_THRESH overlap 154 | fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] 155 | 156 | 157 | 158 | # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs 159 | fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) 160 | # Sample foreground regions without replacement 161 | if len(fg_indexes) > fg_rois_per_this_image: 162 | fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) 163 | 164 | 165 | debug = False 166 | if debug: 167 | #yangyk 168 | print('fg_indexes size:', fg_indexes.size, 'fg_rois_per_image:',fg_rois_per_image, 'fg_rois_per_this_image:', fg_rois_per_this_image) 169 | 170 | 171 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 172 | bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 173 | # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) 174 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 175 | bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) 176 | # Sample foreground regions without replacement 177 | if len(bg_indexes) > bg_rois_per_this_image: 178 | bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) 179 | 180 | # indexes selected 181 | keep_indexes = np.append(fg_indexes, bg_indexes) 182 | 183 | #print('fg_over_laps:', overlaps[fg_indexes]) 184 | 185 | # pad more to ensure a fixed minibatch size 186 | while keep_indexes.shape[0] < rois_per_image: 187 | gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) 188 | gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) 189 | keep_indexes = np.append(keep_indexes, gap_indexes) 190 | 191 | 192 | # select labels 193 | labels = labels[keep_indexes] 194 | #yangyk 195 | labels_all = labels.copy() 196 | 197 | # set labels of bg_rois to be 0 198 | labels[fg_rois_per_this_image:] = 0 199 | rois = rois[keep_indexes] 200 | 201 | #print('labels:',labels) 202 | 203 | # load or compute bbox_target 204 | if bbox_targets is not None: 205 | bbox_target_data = bbox_targets[keep_indexes, :] 206 | else: 207 | targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) 208 | if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: 209 | targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) 210 | / np.array(cfg.TRAIN.BBOX_STDS)) 211 | bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) 212 | 213 | #yangyk 214 | overlaps = overlaps[keep_indexes] 215 | #print('fg_over_laps:', overlaps[:fg_rois_per_this_image]) 216 | 217 | neg_low = 0.0 218 | neg_middle = 0.2 219 | neg_high = 0.3 220 | neg_indexes_L1 = np.where((overlaps < neg_middle) & (overlaps >= neg_low))[0] 221 | neg_indexes_L2 = np.where((overlaps < neg_high) & (overlaps >= neg_middle))[0] 222 | neg_indexes_L3 = np.where(overlaps >= neg_high)[0] 223 | 224 | neg_labels = np.zeros(labels.shape) 225 | 226 | 227 | #print(neg_indexes_L2) 228 | neg_labels[neg_indexes_L2] = labels_all[neg_indexes_L2] 229 | 230 | if debug: 231 | print('neg_indexes_L1:', len(neg_indexes_L1), 'neg_indexes_L2:', len(neg_indexes_L2), 'neg_indexes_L3',len(neg_indexes_L3)) 232 | print('labels_all:',labels_all) 233 | print('neg_labels:',neg_labels,'neg_labels_shape:',neg_labels.shape) 234 | #print(neg_labels[neg_indexes_L2]) 235 | print('<<>>>',neg_labels[neg_indexes_L2]) 236 | print('fg neg labels sum',np.sum(neg_labels[neg_indexes_L2])) 237 | print('neg labels sum', np.sum(neg_labels)) 238 | print('over_laps:',overlaps) 239 | print('neg_fg_over_laps:', overlaps[neg_indexes_L2]) 240 | print('<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') 241 | 242 | 243 | bbox_targets, bbox_weights = \ 244 | expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) 245 | 246 | 247 | return rois, labels, neg_labels, bbox_targets, bbox_weights 248 | 249 | -------------------------------------------------------------------------------- /fpn/core/rcnn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/rcnn.pyc -------------------------------------------------------------------------------- /fpn/core/tester.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/core/tester.pyc -------------------------------------------------------------------------------- /fpn/function/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/function/__init__.py -------------------------------------------------------------------------------- /fpn/function/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/function/__init__.pyc -------------------------------------------------------------------------------- /fpn/function/test_rcnn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified by Guodong Zhang 7 | # -------------------------------------------------------- 8 | 9 | import argparse 10 | import pprint 11 | import logging 12 | import time 13 | import os 14 | import mxnet as mx 15 | 16 | from symbols import * 17 | from dataset import * 18 | from core.loader import TestLoader 19 | from core.tester import Predictor, pred_eval 20 | from utils.load_model import load_param 21 | 22 | 23 | def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, 24 | ctx, prefix, epoch, 25 | vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None,nms_dets=None,is_docker=False): 26 | if not logger: 27 | assert False, 'require a logger' 28 | 29 | # print cfg 30 | #pprint.pprint(cfg) 31 | #logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) 32 | 33 | #leonid: added to support ; for multi-dataset listing - this is a temp solution allowing just one DB in test 34 | datasets = dataset.split(';') 35 | dataset_paths = dataset_path.split(';') 36 | imagesets = image_set.split(';') 37 | output_paths = output_path.split(';') 38 | categ_index_offs = 20 #TODO: remove 39 | for dataset, dataset_path, image_set,output_path in zip(datasets,dataset_paths,imagesets,output_paths): 40 | if len(image_set.strip())<=0: 41 | continue 42 | 43 | if 'classes_list_fname' not in cfg.dataset: 44 | classes_list_fname = '' 45 | else: 46 | classes_list_fname = cfg.dataset.classes_list_fname 47 | 48 | # load symbol and testing data 49 | if has_rpn: 50 | sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 51 | sym = sym_instance.get_symbol(cfg, is_train=False) 52 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path,classes_list_fname=classes_list_fname,categ_index_offs=categ_index_offs) 53 | roidb = imdb.gt_roidb() 54 | else: 55 | sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 56 | sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) 57 | imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) 58 | gt_roidb = imdb.gt_roidb() 59 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) 60 | categ_index_offs+=imdb.num_classes 61 | # get test data iter 62 | test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) 63 | 64 | if not is_docker: 65 | # load model 66 | arg_params, aux_params = load_param(prefix, epoch, process=True) 67 | 68 | # infer shape 69 | data_shape_dict = dict(test_data.provide_data_single) 70 | sym_instance.infer_shape(data_shape_dict) 71 | 72 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) 73 | 74 | # decide maximum shape 75 | data_names = [k[0] for k in test_data.provide_data_single] 76 | label_names = None 77 | max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] 78 | if not has_rpn: 79 | max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) 80 | 81 | # create predictor 82 | predictor = Predictor(sym, data_names, label_names, 83 | context=ctx, max_data_shapes=max_data_shape, 84 | provide_data=test_data.provide_data, provide_label=test_data.provide_label, 85 | arg_params=arg_params, aux_params=aux_params) 86 | 87 | #make sure imdb and detector have the same number of classes 88 | #imdb.num_classes=min(imdb.num_classes,cfg.dataset.NUM_CLASSES) # JS, March 2019: the JES dataset class produces num_classes = number of foreground classes, while the tester assumes this includes the background. 89 | imdb.num_classes =cfg.dataset.NUM_CLASSES 90 | else: 91 | predictor=None 92 | 93 | # start detection 94 | pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger,nms_dets=nms_dets) 95 | 96 | -------------------------------------------------------------------------------- /fpn/function/test_rcnn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/function/test_rcnn.pyc -------------------------------------------------------------------------------- /fpn/function/train_rcnn.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified by Guodong Zhang 7 | # -------------------------------------------------------- 8 | 9 | import argparse 10 | import logging 11 | import pprint 12 | import os 13 | import mxnet as mx 14 | import numpy as np 15 | 16 | from symbols import * 17 | from core import callback, metric 18 | from core.loader import ROIIter 19 | from core.module import MutableModule 20 | from bbox.bbox_regression import add_bbox_regression_targets 21 | from utils.load_data import load_proposal_roidb, merge_roidb, filter_roidb 22 | from utils.load_model import load_param 23 | from utils.PrefetchingIter import PrefetchingIter 24 | from utils.lr_scheduler import WarmupMultiFactorScheduler 25 | 26 | 27 | def train_rcnn(cfg, dataset, image_set, root_path, dataset_path, 28 | frequent, kvstore, flip, shuffle, resume, 29 | ctx, pretrained, epoch, prefix, begin_epoch, end_epoch, 30 | train_shared, lr, lr_step, proposal, logger=None, output_path=None): 31 | mx.random.seed(np.random.randint(10000)) 32 | np.random.seed(np.random.randint(10000)) 33 | # set up logger 34 | if not logger: 35 | logging.basicConfig() 36 | logger = logging.getLogger() 37 | logger.setLevel(logging.INFO) 38 | 39 | # load symbol 40 | sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() 41 | sym = sym_instance.get_symbol_rcnn(cfg, is_train=True) 42 | 43 | # setup multi-gpu 44 | batch_size = len(ctx) 45 | input_batch_size = cfg.TRAIN.BATCH_IMAGES * batch_size 46 | 47 | # print cfg 48 | pprint.pprint(cfg) 49 | logger.info('training rcnn cfg:{}\n'.format(pprint.pformat(cfg))) 50 | 51 | # load dataset and prepare imdb for training 52 | image_sets = [iset for iset in image_set.split('+')] 53 | roidbs = [load_proposal_roidb(dataset, image_set, root_path, dataset_path, 54 | proposal=proposal, append_gt=True, flip=flip, result_path=output_path) 55 | for image_set in image_sets] 56 | roidb = merge_roidb(roidbs) 57 | roidb = filter_roidb(roidb, cfg) 58 | means, stds = add_bbox_regression_targets(roidb, cfg) 59 | 60 | # load training data 61 | train_data = ROIIter(roidb, cfg, batch_size=input_batch_size, shuffle=shuffle, 62 | ctx=ctx, aspect_grouping=cfg.TRAIN.ASPECT_GROUPING) 63 | 64 | # infer max shape 65 | max_data_shape = [('data', (cfg.TRAIN.BATCH_IMAGES, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))] 66 | 67 | # infer shape 68 | data_shape_dict = dict(train_data.provide_data_single + train_data.provide_label_single) 69 | sym_instance.infer_shape(data_shape_dict) 70 | 71 | # load and initialize params 72 | if resume: 73 | print('continue training from ', begin_epoch) 74 | arg_params, aux_params = load_param(prefix, begin_epoch, convert=True) 75 | else: 76 | arg_params, aux_params = load_param(pretrained, epoch, convert=True) 77 | sym_instance.init_weight_rcnn(cfg, arg_params, aux_params) 78 | 79 | # check parameter shapes 80 | sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict) 81 | 82 | # prepare training 83 | # create solver 84 | data_names = [k[0] for k in train_data.provide_data_single] 85 | label_names = [k[0] for k in train_data.provide_label_single] 86 | if train_shared: 87 | fixed_param_prefix = cfg.network.FIXED_PARAMS_SHARED 88 | else: 89 | fixed_param_prefix = cfg.network.FIXED_PARAMS 90 | mod = MutableModule(sym, data_names=data_names, label_names=label_names, 91 | logger=logger, context=ctx, 92 | max_data_shapes=[max_data_shape for _ in range(batch_size)], fixed_param_prefix=fixed_param_prefix) 93 | 94 | if cfg.TRAIN.RESUME: 95 | mod._preload_opt_states = '%s-%04d.states'%(prefix, begin_epoch) 96 | 97 | 98 | # decide training params 99 | # metric 100 | eval_metric = metric.RCNNAccMetric(cfg) 101 | cls_metric = metric.RCNNLogLossMetric(cfg) 102 | bbox_metric = metric.RCNNL1LossMetric(cfg) 103 | eval_metrics = mx.metric.CompositeEvalMetric() 104 | for child_metric in [eval_metric, cls_metric, bbox_metric]: 105 | eval_metrics.add(child_metric) 106 | # callback 107 | batch_end_callback = callback.Speedometer(train_data.batch_size, frequent=frequent) 108 | epoch_end_callback = [mx.callback.module_checkpoint(mod, prefix, period=1, save_optimizer_states=True), 109 | callback.do_checkpoint(prefix, means, stds)] 110 | # decide learning rate 111 | base_lr = lr 112 | lr_factor = cfg.TRAIN.lr_factor 113 | lr_epoch = [float(epoch) for epoch in lr_step.split(',')] 114 | lr_epoch_diff = [epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch] 115 | lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff))) 116 | lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff] 117 | print('lr', lr, 'lr_epoch_diff', lr_epoch_diff, 'lr_iters', lr_iters) 118 | lr_scheduler = WarmupMultiFactorScheduler(lr_iters, lr_factor, cfg.TRAIN.warmup, cfg.TRAIN.warmup_lr, cfg.TRAIN.warmup_step) 119 | # optimizer 120 | optimizer_params = {'momentum': cfg.TRAIN.momentum, 121 | 'wd': cfg.TRAIN.wd, 122 | 'learning_rate': lr, 123 | 'lr_scheduler': lr_scheduler, 124 | 'rescale_grad': 1.0, 125 | 'clip_gradient': None} 126 | 127 | # train 128 | 129 | if not isinstance(train_data, PrefetchingIter): 130 | train_data = PrefetchingIter(train_data) 131 | 132 | mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback, 133 | batch_end_callback=batch_end_callback, kvstore=kvstore, 134 | optimizer='sgd', optimizer_params=optimizer_params, 135 | arg_params=arg_params, aux_params=aux_params, begin_epoch=begin_epoch, num_epoch=end_epoch) 136 | 137 | -------------------------------------------------------------------------------- /fpn/operator_py/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/operator_py/__init__.py -------------------------------------------------------------------------------- /fpn/operator_py/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/operator_py/__init__.pyc -------------------------------------------------------------------------------- /fpn/operator_py/box_annotator_ohem.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Yuwen Xiong 7 | # -------------------------------------------------------- 8 | 9 | """ 10 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them. 11 | """ 12 | 13 | import mxnet as mx 14 | import numpy as np 15 | from distutils.util import strtobool 16 | from config.config import config 17 | 18 | class BoxAnnotatorOHEMOperator(mx.operator.CustomOp): 19 | def __init__(self, num_classes, num_reg_classes,rm_last, roi_per_img): 20 | super(BoxAnnotatorOHEMOperator, self).__init__() 21 | self._num_classes = num_classes 22 | self._num_reg_classes = num_reg_classes 23 | self._roi_per_img = roi_per_img 24 | self._rm_last = rm_last 25 | 26 | def forward(self, is_train, req, in_data, out_data, aux): 27 | 28 | cls_score = in_data[0] 29 | bbox_pred = in_data[1] 30 | labels = in_data[2].asnumpy() 31 | bbox_targets = in_data[3] 32 | bbox_weights = in_data[4] 33 | 34 | if config.network.SOFTMAX_ENABLED: 35 | per_roi_loss_cls = mx.nd.SoftmaxActivation(cls_score) + 1e-14 36 | else: 37 | per_roi_loss_cls = cls_score + 1e-14 38 | per_roi_loss_cls = per_roi_loss_cls.asnumpy() 39 | per_roi_loss_cls = per_roi_loss_cls[np.arange(per_roi_loss_cls.shape[0], dtype='int'), labels.astype('int')] 40 | per_roi_loss_cls = -1 * np.log(per_roi_loss_cls) 41 | per_roi_loss_cls = np.reshape(per_roi_loss_cls, newshape=(-1,)) 42 | 43 | per_roi_loss_bbox = bbox_weights * mx.nd.smooth_l1((bbox_pred - bbox_targets), scalar=1.0) 44 | per_roi_loss_bbox = mx.nd.sum(per_roi_loss_bbox, axis=1).asnumpy() 45 | 46 | # changed by Leonid to make sure all the positives are in 47 | total_loss = per_roi_loss_cls + per_roi_loss_bbox 48 | # total_loss[labels>0] += 10000 49 | 50 | top_k_per_roi_loss = np.argsort(total_loss) 51 | labels_ohem = labels 52 | if self._rm_last==1: 53 | labels_ohem[np.where(labels_ohem==self._num_classes-1)]=-1 54 | 55 | labels_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = -1 56 | bbox_weights_ohem = bbox_weights.asnumpy() 57 | bbox_weights_ohem[top_k_per_roi_loss[::-1][self._roi_per_img:]] = 0 58 | 59 | labels_ohem = mx.nd.array(labels_ohem) 60 | bbox_weights_ohem = mx.nd.array(bbox_weights_ohem) 61 | 62 | for ind, val in enumerate([labels_ohem, bbox_weights_ohem]): 63 | self.assign(out_data[ind], req[ind], val) 64 | 65 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 66 | for i in range(len(in_grad)): 67 | self.assign(in_grad[i], req[i], 0) 68 | 69 | 70 | @mx.operator.register('BoxAnnotatorOHEM') 71 | class BoxAnnotatorOHEMProp(mx.operator.CustomOpProp): 72 | def __init__(self, num_classes, num_reg_classes,rm_last, roi_per_img): 73 | super(BoxAnnotatorOHEMProp, self).__init__(need_top_grad=False) 74 | self._num_classes = int(num_classes) 75 | self._num_reg_classes = int(num_reg_classes) 76 | self._roi_per_img = int(roi_per_img) 77 | self._rm_last = rm_last 78 | def list_arguments(self): 79 | return ['cls_score', 'bbox_pred', 'labels', 'bbox_targets', 'bbox_weights'] 80 | 81 | def list_outputs(self): 82 | return ['labels_ohem', 'bbox_weights_ohem'] 83 | 84 | def infer_shape(self, in_shape): 85 | labels_shape = in_shape[2] 86 | bbox_weights_shape = in_shape[4] 87 | 88 | return in_shape, \ 89 | [labels_shape, bbox_weights_shape] 90 | 91 | def create_operator(self, ctx, shapes, dtypes): 92 | return BoxAnnotatorOHEMOperator(self._num_classes, self._num_reg_classes,self._rm_last, self._roi_per_img) 93 | 94 | def declare_backward_dependency(self, out_grad, in_data, out_data): 95 | return [] 96 | -------------------------------------------------------------------------------- /fpn/operator_py/box_annotator_ohem.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/operator_py/box_annotator_ohem.pyc -------------------------------------------------------------------------------- /fpn/operator_py/fpn_roi_pooling.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Haozhi Qi, Yuwen Xiong 7 | # -------------------------------------------------------- 8 | 9 | import mxnet as mx 10 | import numpy as np 11 | from mxnet.contrib import autograd 12 | import gc 13 | 14 | 15 | class FPNROIPoolingOperator(mx.operator.CustomOp): 16 | def __init__(self, feat_strides, pooled_height, pooled_width, output_dim, with_deformable): 17 | self.pooled_height = pooled_height 18 | self.pooled_width = pooled_width 19 | self.feat_strides = feat_strides 20 | self.with_deformable = with_deformable 21 | self.output_dim = output_dim 22 | self.in_grad_hist_list = [] 23 | self.num_strides = len(self.feat_strides) 24 | self.roi_pool = [None for _ in range(self.num_strides)] 25 | self.feat_idx = [None for _ in range(self.num_strides)] 26 | 27 | def forward(self, is_train, req, in_data, out_data, aux): 28 | rois = in_data[-1].asnumpy() 29 | w = rois[:, 3] - rois[:, 1] + 1 30 | h = rois[:, 4] - rois[:, 2] + 1 31 | 32 | #Leonid fix, just in case w / h will misbehave 33 | w = np.maximum(w, 1e-7) 34 | h = np.maximum(h, 1e-7) 35 | 36 | feat_id = np.clip(np.floor(2 + np.log2(np.sqrt(w * h) / 224)), 0, len(self.feat_strides) - 1) 37 | pyramid_idx = [] 38 | 39 | rois_p = [None for _ in range(self.num_strides)] 40 | for i in range(self.num_strides): 41 | self.feat_idx[i] = np.where(feat_id == i)[0] 42 | if len(self.feat_idx[i]) == 0: 43 | # padding dummy roi 44 | rois_p[i] = np.zeros((1, 5)) 45 | pyramid_idx.append(-1) 46 | else: 47 | rois_p[i] = rois[self.feat_idx[i]] 48 | pyramid_idx.append(self.feat_idx[i]) 49 | rois_idx = np.argsort(np.hstack(pyramid_idx))[-rois.shape[0]:] 50 | 51 | if is_train: 52 | for i in range(self.num_strides): 53 | self.in_grad_hist_list.append(mx.nd.zeros_like(in_data[i])) 54 | 55 | if self.with_deformable: 56 | for i in range(self.num_strides, self.num_strides * 3): 57 | self.in_grad_hist_list.append(mx.nd.zeros_like(in_data[i])) 58 | autograd.mark_variables([in_data[i] for i in range(self.num_strides * 3)], self.in_grad_hist_list) 59 | 60 | with autograd.train_section(): 61 | for i in range(self.num_strides): 62 | roi_offset_t = mx.contrib.nd.DeformablePSROIPooling(data=in_data[i], rois=mx.nd.array(rois_p[i], in_data[i].context), group_size=1, pooled_size=7, 63 | sample_per_part=4, no_trans=True, part_size=7, output_dim=256, spatial_scale=1.0 / self.feat_strides[i]) 64 | roi_offset = mx.nd.FullyConnected(data=roi_offset_t, num_hidden=7 * 7 * 2, weight=in_data[i * 2 + self.num_strides], bias=in_data[i * 2 + 1 + self.num_strides]) 65 | roi_offset_reshape = mx.nd.reshape(data=roi_offset, shape=(-1, 2, 7, 7)) 66 | self.roi_pool[i] = mx.contrib.nd.DeformablePSROIPooling(data=in_data[i], rois=mx.nd.array(rois_p[i], in_data[i].context), trans=roi_offset_reshape, 67 | group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, 68 | output_dim=self.output_dim, spatial_scale=1.0 / self.feat_strides[i], trans_std=0.1) 69 | else: 70 | autograd.mark_variables([in_data[i] for i in range(self.num_strides)], self.in_grad_hist_list) 71 | with autograd.train_section(): 72 | for i in range(self.num_strides): 73 | self.roi_pool[i] = mx.nd.ROIPooling(in_data[i], mx.nd.array(rois_p[i], in_data[i].context), (7, 7), spatial_scale=1.0 / self.feat_strides[i]) 74 | roi_pool = mx.nd.concatenate(self.roi_pool, axis=0) 75 | else: 76 | # during testing, there is no need to record variable, thus saving memory 77 | roi_pool = [None for _ in range(self.num_strides)] 78 | if self.with_deformable: 79 | for i in range(self.num_strides): 80 | roi_offset_t = mx.contrib.nd.DeformablePSROIPooling(data=in_data[i], rois=mx.nd.array(rois_p[i], in_data[i].context), group_size=1, pooled_size=7, 81 | sample_per_part=4, no_trans=True, part_size=7, output_dim=256, spatial_scale=1.0 / self.feat_strides[i]) 82 | roi_offset = mx.nd.FullyConnected(data=roi_offset_t, num_hidden=7 * 7 * 2, weight=in_data[i * 2 + self.num_strides], bias=in_data[i * 2 + 1 + self.num_strides]) 83 | roi_offset_reshape = mx.nd.reshape(data=roi_offset, shape=(-1, 2, 7, 7)) 84 | roi_pool[i] = mx.contrib.nd.DeformablePSROIPooling(data=in_data[i], rois=mx.nd.array(rois_p[i], in_data[i].context), trans=roi_offset_reshape, 85 | group_size=1, pooled_size=7, sample_per_part=4, no_trans=False, part_size=7, 86 | output_dim=self.output_dim, spatial_scale=1.0 / self.feat_strides[i], trans_std=0.1) 87 | else: 88 | for i in range(self.num_strides): 89 | roi_pool[i] = mx.nd.ROIPooling(in_data[i], mx.nd.array(rois_p[i], in_data[i].context), (7, 7), spatial_scale=1.0 / self.feat_strides[i]) 90 | 91 | roi_pool = mx.nd.concatenate(roi_pool, axis=0) 92 | 93 | roi_pool = mx.nd.take(roi_pool, mx.nd.array(rois_idx, roi_pool.context)) 94 | self.assign(out_data[0], req[0], roi_pool) 95 | 96 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 97 | for i in range(len(in_grad)): 98 | self.assign(in_grad[i], req[i], 0) 99 | 100 | with autograd.train_section(): 101 | for i in range(self.num_strides): 102 | if len(self.feat_idx[i] > 0): 103 | autograd.compute_gradient([mx.nd.take(out_grad[0], mx.nd.array(self.feat_idx[i], out_grad[0].context)) * self.roi_pool[i]]) 104 | 105 | if self.with_deformable: 106 | for i in range(0, self.num_strides * 3): 107 | self.assign(in_grad[i], req[i], self.in_grad_hist_list[i]) 108 | else: 109 | for i in range(0, self.num_strides): 110 | self.assign(in_grad[i], req[i], self.in_grad_hist_list[i]) 111 | 112 | gc.collect() 113 | 114 | 115 | @mx.operator.register('fpn_roi_pooling') 116 | class FPNROIPoolingProp(mx.operator.CustomOpProp): 117 | def __init__(self, feat_strides='(4,8,16,32)', pooled_height='7', pooled_width='7', with_deformable='False', output_dim='256'): 118 | super(FPNROIPoolingProp, self).__init__(need_top_grad=True) 119 | self.pooled_height = int(pooled_height) 120 | self.pooled_width = int(pooled_width) 121 | self.feat_strides = np.fromstring(feat_strides[1:-1], dtype=int, sep=',') 122 | self.with_deformable = with_deformable == 'True' 123 | self.output_dim = int(output_dim) 124 | 125 | self.num_strides = len(self.feat_strides) 126 | 127 | def list_arguments(self): 128 | args_list = [] 129 | for i in range(self.num_strides): 130 | args_list.append('data_p{}'.format(2 + i)) 131 | if self.with_deformable: 132 | for i in range(self.num_strides): 133 | args_list.extend(['offset_weight_p{}'.format(2 + i), 'offset_bias_p{}'.format(2 + i)]) 134 | args_list.append('rois') 135 | return args_list 136 | 137 | def list_outputs(self): 138 | return ['output'] 139 | 140 | def infer_shape(self, in_shape): 141 | output_feat_shape = [in_shape[-1][0], in_shape[0][1], self.pooled_height, self.pooled_width] 142 | if self.with_deformable: 143 | offset_dim = self.pooled_height * self.pooled_width * 2 144 | input_dim = self.pooled_height * self.pooled_width * self.output_dim 145 | for i in range(self.num_strides): 146 | in_shape[i * 2 + self.num_strides], in_shape[i * 2 + 1 + self.num_strides] = [offset_dim, input_dim], [offset_dim, ] 147 | return in_shape, [output_feat_shape] 148 | 149 | def create_operator(self, ctx, shapes, dtypes): 150 | return FPNROIPoolingOperator(self.feat_strides, self.pooled_height, self.pooled_width, self.output_dim, self.with_deformable) 151 | 152 | def declare_backward_dependency(self, out_grad, in_data, out_data): 153 | return [out_grad[0]] 154 | -------------------------------------------------------------------------------- /fpn/operator_py/fpn_roi_pooling.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/operator_py/fpn_roi_pooling.pyc -------------------------------------------------------------------------------- /fpn/operator_py/predict_rois_deltas.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------ 2 | # 3D transform of image points 3 | # implemented for JES data 4 | # CVAR, IBM Research AI, Haifa 5 | # May 2019 6 | #------------------------------------------ 7 | 8 | import mxnet as mx 9 | import numpy as np 10 | from mxnet.contrib import autograd 11 | import gc 12 | 13 | 14 | class PredRoisDeltasOperator(mx.operator.CustomOp): 15 | def __init__(self, feat_strides, pooled_height, pooled_width, output_dim): 16 | self.pooled_height = pooled_height 17 | self.pooled_width = pooled_width 18 | self.feat_strides = feat_strides 19 | self.output_dim = output_dim 20 | self.in_grad_hist_list = [] 21 | self.num_strides = len(self.feat_strides) 22 | self.roi_pool = [None for _ in range(self.num_strides)] 23 | self.feat_idx = [None for _ in range(self.num_strides)] 24 | 25 | def forward(self, is_train, req, in_data, out_data, aux): 26 | rois = in_data[-1].asnumpy() 27 | w = rois[:, 3] - rois[:, 1] + 1 28 | h = rois[:, 4] - rois[:, 2] + 1 29 | 30 | #Leonid fix, just in case w / h will misbehave 31 | w = np.maximum(w, 1e-7) 32 | h = np.maximum(h, 1e-7) 33 | 34 | feat_id = np.clip(np.floor(2 + np.log2(np.sqrt(w * h) / 224)), 0, len(self.feat_strides) - 1) 35 | pyramid_idx = [] 36 | 37 | rois_p = [None for _ in range(self.num_strides)] 38 | for i in range(self.num_strides): 39 | self.feat_idx[i] = np.where(feat_id == i)[0] 40 | if len(self.feat_idx[i]) == 0: 41 | # padding dummy roi 42 | rois_p[i] = np.zeros((1, 5)) 43 | pyramid_idx.append(-1) 44 | else: 45 | rois_p[i] = rois[self.feat_idx[i]] 46 | pyramid_idx.append(self.feat_idx[i]) 47 | rois_idx = np.argsort(np.hstack(pyramid_idx))[-rois.shape[0]:] 48 | 49 | if is_train: 50 | for i in range(self.num_strides): 51 | self.in_grad_hist_list.append(mx.nd.zeros_like(in_data[i])) 52 | 53 | autograd.mark_variables([in_data[i] for i in range(self.num_strides)], self.in_grad_hist_list) 54 | with autograd.train_section(): 55 | for i in range(self.num_strides): 56 | self.roi_pool[i] = mx.nd.ROIPooling(in_data[i], mx.nd.array(rois_p[i], in_data[i].context), (7, 7), 57 | spatial_scale=1.0 / self.feat_strides[i]) 58 | 59 | rpn_conv = mx.sym.Convolution(data=in_data[i], kernel=(3, 3), pad=(1, 1), num_filter=512, 60 | name='rpn_conv_' + suffix, 61 | weight=self.shared_param_dict['rpn_conv_weight'], 62 | bias=self.shared_param_dict['rpn_conv_bias']) 63 | 64 | 65 | else: 66 | roi_pool = [None for _ in range(self.num_strides)] 67 | 68 | for i in range(self.num_strides): 69 | roi_pool[i] = mx.nd.ROIPooling(in_data[i], mx.nd.array(rois_p[i], in_data[i].context), (7, 7), 70 | spatial_scale=1.0 / self.feat_strides[i]) 71 | 72 | roi_pool = mx.nd.take(roi_pool, mx.nd.array(rois_idx, roi_pool.context)) 73 | self.assign(out_data[0], req[0], roi_pool) 74 | 75 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 76 | for i in range(len(in_grad)): 77 | self.assign(in_grad[i], req[i], 0) 78 | 79 | with autograd.train_section(): 80 | for i in range(self.num_strides): 81 | if len(self.feat_idx[i] > 0): 82 | autograd.compute_gradient([mx.nd.take(out_grad[0], mx.nd.array(self.feat_idx[i], out_grad[0].context)) * self.roi_pool[i]]) 83 | 84 | for i in range(0, self.num_strides): 85 | self.assign(in_grad[i], req[i], self.in_grad_hist_list[i]) 86 | 87 | gc.collect() 88 | 89 | @mx.operator.register('predict_rois_deltas') 90 | class PredRoisDeltasProp(mx.operator.CustomOpProp): 91 | def __init__(self, feat_strides='(4,8,16,32)', pooled_height='7', pooled_width='7', output_dim='256'): 92 | super(PredRoisDeltasProp, self).__init__(need_top_grad=True) 93 | self.pooled_height = int(pooled_height) 94 | self.pooled_width = int(pooled_width) 95 | self.feat_strides = np.fromstring(feat_strides[1:-1], dtype=int, sep=',') 96 | 97 | self.output_dim = int(output_dim) 98 | 99 | self.num_strides = len(self.feat_strides) 100 | 101 | def list_arguments(self): 102 | args_list = [] 103 | for i in range(self.num_strides): 104 | args_list.append('data_p{}'.format(2 + i)) 105 | args_list.append('rois') 106 | return args_list 107 | 108 | def list_outputs(self): 109 | return ['output'] 110 | 111 | def infer_shape(self, in_shape): 112 | output_feat_shape = [in_shape[-1][0], in_shape[0][1], self.pooled_height, self.pooled_width] 113 | return in_shape, [output_feat_shape] 114 | 115 | 116 | def create_operator(self, ctx, shapes, dtypes): 117 | return PredRoisDeltasOperator(self.feat_strides, self.pooled_height, self.pooled_width, self.output_dim, self.with_deformable) 118 | 119 | def declare_backward_dependency(self, out_grad, in_data, out_data): 120 | return [out_grad[0]] 121 | -------------------------------------------------------------------------------- /fpn/operator_py/proposal_target.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Copyright (c) 2019 IBM Corp 6 | # Licensed under The Apache-2.0 License [see LICENSE for details] 7 | # Modified by Yuwen Xiong 8 | # -------------------------------------------------------- 9 | 10 | """ 11 | Proposal Target Operator selects foreground and background roi and assigns label, bbox_transform to them. 12 | """ 13 | 14 | import mxnet as mx 15 | import numpy as np 16 | from distutils.util import strtobool 17 | from easydict import EasyDict as edict 18 | import cPickle 19 | 20 | 21 | from core.rcnn import sample_rois 22 | 23 | DEBUG = False 24 | 25 | 26 | class ProposalTargetOperator(mx.operator.CustomOp): 27 | def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction): 28 | super(ProposalTargetOperator, self).__init__() 29 | self._num_classes = num_classes 30 | self._batch_images = batch_images 31 | self._batch_rois = batch_rois 32 | self._cfg = cfg 33 | self._fg_fraction = fg_fraction 34 | 35 | if DEBUG: 36 | self._count = 0 37 | self._fg_num = 0 38 | self._bg_num = 0 39 | 40 | def forward(self, is_train, req, in_data, out_data, aux): 41 | assert self._batch_rois == -1 or self._batch_rois % self._batch_images == 0, \ 42 | 'batchimages {} must devide batch_rois {}'.format(self._batch_images, self._batch_rois) 43 | all_rois = in_data[0].asnumpy() 44 | gt_boxes = in_data[1].asnumpy() 45 | 46 | if self._batch_rois == -1: 47 | rois_per_image = all_rois.shape[0] + gt_boxes.shape[0] 48 | fg_rois_per_image = rois_per_image 49 | else: 50 | rois_per_image = self._batch_rois / self._batch_images 51 | fg_rois_per_image = np.round(self._fg_fraction * rois_per_image).astype(int) 52 | 53 | # Include ground-truth boxes in the set of candidate rois 54 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) 55 | all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1])))) 56 | # Sanity check: single batch only 57 | assert np.all(all_rois[:, 0] == 0), 'Only single item batches are supported' 58 | 59 | rois, labels, neg_labels, bbox_targets, bbox_weights = \ 60 | sample_rois(all_rois, fg_rois_per_image, rois_per_image, self._num_classes, self._cfg, gt_boxes=gt_boxes) 61 | 62 | if DEBUG: 63 | print "labels=", labels 64 | print 'num fg: {}'.format((labels > 0).sum()) 65 | print 'num bg: {}'.format((labels == 0).sum()) 66 | self._count += 1 67 | self._fg_num += (labels > 0).sum() 68 | self._bg_num += (labels == 0).sum() 69 | print "self._count=", self._count 70 | print 'num fg avg: {}'.format(self._fg_num / self._count) 71 | print 'num bg avg: {}'.format(self._bg_num / self._count) 72 | print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)) 73 | 74 | for ind, val in enumerate([rois, labels, neg_labels, bbox_targets, bbox_weights]): 75 | self.assign(out_data[ind], req[ind], val) 76 | 77 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 78 | for i in range(len(in_grad)): 79 | self.assign(in_grad[i], req[i], 0) 80 | 81 | 82 | @mx.operator.register('proposal_target') 83 | class ProposalTargetProp(mx.operator.CustomOpProp): 84 | def __init__(self, num_classes, batch_images, batch_rois, cfg, fg_fraction='0.25'): 85 | super(ProposalTargetProp, self).__init__(need_top_grad=False) 86 | self._num_classes = int(num_classes) 87 | self._batch_images = int(batch_images) 88 | self._batch_rois = int(batch_rois) 89 | self._cfg = cPickle.loads(cfg) 90 | self._fg_fraction = float(fg_fraction) 91 | 92 | def list_arguments(self): 93 | return ['rois', 'gt_boxes'] 94 | 95 | def list_outputs(self): 96 | return ['rois_output', 'label', 'neg_label', 'bbox_target', 'bbox_weight'] 97 | 98 | def infer_shape(self, in_shape): 99 | rpn_rois_shape = in_shape[0] 100 | gt_boxes_shape = in_shape[1] 101 | 102 | rois = rpn_rois_shape[0] + gt_boxes_shape[0] if self._batch_rois == -1 else self._batch_rois 103 | 104 | output_rois_shape = (rois, 5) 105 | label_shape = (rois, ) 106 | neg_label_shape = (rois, ) 107 | bbox_target_shape = (rois, self._num_classes * 4) 108 | bbox_weight_shape = (rois, self._num_classes * 4) 109 | 110 | return [rpn_rois_shape, gt_boxes_shape], \ 111 | [output_rois_shape, label_shape, neg_label_shape, bbox_target_shape, bbox_weight_shape] 112 | 113 | def create_operator(self, ctx, shapes, dtypes): 114 | return ProposalTargetOperator(self._num_classes, self._batch_images, self._batch_rois, self._cfg, self._fg_fraction) 115 | 116 | def declare_backward_dependency(self, out_grad, in_data, out_data): 117 | return [] 118 | -------------------------------------------------------------------------------- /fpn/operator_py/proposal_target.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/operator_py/proposal_target.pyc -------------------------------------------------------------------------------- /fpn/operator_py/pyramid_proposal.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/operator_py/pyramid_proposal.pyc -------------------------------------------------------------------------------- /fpn/symbols/__init__.py: -------------------------------------------------------------------------------- 1 | import resnet_v1_101_fpn_rcnn 2 | import resnet_v1_101_fpn_dcn_rcnn 3 | import resnet_v1_101_fpn_dcn_rcnn_oneshot_v3 4 | import resnet_v1_101_fpn_dcn_rcnn_rep_noemb -------------------------------------------------------------------------------- /fpn/symbols/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/symbols/__init__.pyc -------------------------------------------------------------------------------- /fpn/symbols/resnet_v1_101_fpn_dcn_rcnn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/symbols/resnet_v1_101_fpn_dcn_rcnn.pyc -------------------------------------------------------------------------------- /fpn/symbols/resnet_v1_101_fpn_dcn_rcnn_oneshot_v3.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/symbols/resnet_v1_101_fpn_dcn_rcnn_oneshot_v3.pyc -------------------------------------------------------------------------------- /fpn/symbols/resnet_v1_101_fpn_dcn_rcnn_rep_noemb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/symbols/resnet_v1_101_fpn_dcn_rcnn_rep_noemb.pyc -------------------------------------------------------------------------------- /fpn/symbols/resnet_v1_101_fpn_rcnn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/symbols/resnet_v1_101_fpn_rcnn.pyc -------------------------------------------------------------------------------- /fpn/test.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Modified by Haozhi Qi 7 | # -------------------------------------------------------- 8 | 9 | import _init_paths 10 | 11 | #import cv2 12 | import argparse 13 | import os 14 | import sys 15 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' 16 | from config.config import config, update_config 17 | 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser(description='Test a Faster R-CNN network') 21 | # general 22 | parser.add_argument('--cfg', help='experiment configure file name', required=True, type=str) 23 | 24 | args, rest = parser.parse_known_args() 25 | update_config(args.cfg) 26 | 27 | # rcnn 28 | parser.add_argument('--vis', help='turn on visualization', action='store_true') 29 | parser.add_argument('--ignore_cache', help='ignore cached results boxes', action='store_true') 30 | parser.add_argument('--thresh', help='valid detection threshold', default=1e-3, type=float) 31 | parser.add_argument('--shuffle', help='shuffle data on visualization', action='store_true') 32 | parser.add_argument('--debug', default=0, help='experiment configure file name', required=False, type=int) 33 | parser.add_argument('--is_docker', help='test in docker mode', action='store_true') 34 | 35 | args = parser.parse_args() 36 | return args 37 | 38 | args = parse_args() 39 | curr_path = os.path.abspath(os.path.dirname(__file__)) 40 | sys.path.insert(0, os.path.join(curr_path, '../external/mxnet', config.MXNET_VERSION)) 41 | 42 | import mxnet as mx 43 | from function.test_rcnn import test_rcnn 44 | from utils.create_logger import create_logger 45 | from nms.nms import gpu_nms_wrapper 46 | 47 | def main(): 48 | #ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] 49 | ctx = [mx.gpu(0),mx.gpu(1),mx.gpu(2),mx.gpu(3)] 50 | print args 51 | #gpu_nums = [int(i) for i in config.gpus.split(',')] 52 | gpu_nums = [0,1,2,3] 53 | nms_dets = gpu_nms_wrapper(config.TEST.NMS, gpu_nums[0]) 54 | logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set) 55 | output_path = os.path.join(final_output_path, '..', '+'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) 56 | test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path, 57 | ctx, output_path, config.TEST.test_epoch, args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, 58 | args.thresh, logger=logger, output_path=final_output_path, nms_dets=nms_dets, is_docker=args.is_docker) 59 | 60 | if __name__ == '__main__': 61 | main() 62 | -------------------------------------------------------------------------------- /fpn/test.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/test.pyc -------------------------------------------------------------------------------- /fpn/train_end2end.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/fpn/train_end2end.pyc -------------------------------------------------------------------------------- /lib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | cd nms/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 3 | cd bbox/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 4 | cd dataset/pycocotools/; python setup.py build_ext --inplace; rm -rf build; cd ../../ 5 | clean: 6 | cd nms/; rm *.so *.c *.cpp; cd ../../ 7 | cd bbox/; rm *.so *.c *.cpp; cd ../../ 8 | cd dataset/pycocotools/; rm *.so; cd ../../ 9 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/__init__.py -------------------------------------------------------------------------------- /lib/bbox/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp -------------------------------------------------------------------------------- /lib/bbox/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/bbox/__init__.py -------------------------------------------------------------------------------- /lib/bbox/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/bbox/__init__.pyc -------------------------------------------------------------------------------- /lib/bbox/bbox.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/bbox/bbox.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/bbox/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Sergey Karayev 7 | # Modified by Yuwen Xiong, from from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 8 | # -------------------------------------------------------- 9 | #cython: language_level=2 10 | 11 | cimport cython 12 | import numpy as np 13 | cimport numpy as np 14 | 15 | DTYPE = np.float 16 | ctypedef np.float_t DTYPE_t 17 | 18 | def bbox_overlaps_cython( 19 | np.ndarray[DTYPE_t, ndim=2] boxes, 20 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 21 | """ 22 | Parameters 23 | ---------- 24 | boxes: (N, 4) ndarray of float 25 | query_boxes: (K, 4) ndarray of float 26 | Returns 27 | ------- 28 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 29 | """ 30 | cdef unsigned int N = boxes.shape[0] 31 | cdef unsigned int K = query_boxes.shape[0] 32 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 33 | cdef DTYPE_t iw, ih, box_area 34 | cdef DTYPE_t ua 35 | cdef unsigned int k, n 36 | for k in range(K): 37 | box_area = ( 38 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 39 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 40 | ) 41 | for n in range(N): 42 | iw = ( 43 | min(boxes[n, 2], query_boxes[k, 2]) - 44 | max(boxes[n, 0], query_boxes[k, 0]) + 1 45 | ) 46 | if iw > 0: 47 | ih = ( 48 | min(boxes[n, 3], query_boxes[k, 3]) - 49 | max(boxes[n, 1], query_boxes[k, 1]) + 1 50 | ) 51 | if ih > 0: 52 | ua = float( 53 | (boxes[n, 2] - boxes[n, 0] + 1) * 54 | (boxes[n, 3] - boxes[n, 1] + 1) + 55 | box_area - iw * ih 56 | ) 57 | overlaps[n, k] = iw * ih / ua 58 | return overlaps 59 | -------------------------------------------------------------------------------- /lib/bbox/bbox.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/bbox/bbox.so -------------------------------------------------------------------------------- /lib/bbox/bbox_regression.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Copyright (c) 2019 IBM Corp 6 | # Licensed under The Apache-2.0 License [see LICENSE for details] 7 | # Modified by Yuwen Xiong, from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 8 | # -------------------------------------------------------- 9 | 10 | 11 | """ 12 | This file has functions about generating bounding box regression targets 13 | """ 14 | 15 | import numpy as np 16 | 17 | from bbox_transform import bbox_overlaps, bbox_transform 18 | 19 | 20 | def compute_bbox_regression_targets(rois, overlaps, labels, cfg): 21 | """ 22 | given rois, overlaps, gt labels, compute bounding box regression targets 23 | :param rois: roidb[i]['boxes'] k * 4 24 | :param overlaps: roidb[i]['max_overlaps'] k * 1 25 | :param labels: roidb[i]['max_classes'] k * 1 26 | :return: targets[i][class, dx, dy, dw, dh] k * 5 27 | """ 28 | # Ensure ROIs are floats 29 | rois = rois.astype(np.float, copy=False) 30 | 31 | # Sanity check 32 | if len(rois) != len(overlaps): 33 | print 'bbox regression: this should not happen' 34 | 35 | # Indices of ground-truth ROIs 36 | gt_inds = np.where(overlaps == 1)[0] 37 | if len(gt_inds) == 0: 38 | print 'something wrong : zero ground truth rois' 39 | # Indices of examples for which we try to make predictions 40 | ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_REGRESSION_THRESH)[0] 41 | 42 | # Get IoU overlap between each ex ROI and gt ROI 43 | ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) 44 | 45 | # Find which gt ROI each ex ROI has max overlap with: 46 | # this will be the ex ROI's gt target 47 | gt_assignment = ex_gt_overlaps.argmax(axis=1) 48 | gt_rois = rois[gt_inds[gt_assignment], :] 49 | ex_rois = rois[ex_inds, :] 50 | 51 | targets = np.zeros((rois.shape[0], 5), dtype=np.float32) 52 | targets[ex_inds, 0] = labels[ex_inds] 53 | targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) 54 | return targets 55 | 56 | 57 | def add_bbox_regression_targets(roidb, cfg): 58 | """ 59 | given roidb, add ['bbox_targets'] and normalize bounding box regression targets 60 | :param roidb: roidb to be processed. must have gone through imdb.prepare_roidb 61 | :return: means, std variances of targets 62 | """ 63 | print 'add bounding box regression targets' 64 | assert len(roidb) > 0 65 | assert 'max_classes' in roidb[0] 66 | 67 | num_images = len(roidb) 68 | num_classes = 2 if cfg.CLASS_AGNOSTIC else roidb[0]['gt_overlaps'].shape[1] 69 | 70 | for im_i in range(num_images): 71 | rois = roidb[im_i]['boxes'] 72 | max_overlaps = roidb[im_i]['max_overlaps'] 73 | max_classes = roidb[im_i]['max_classes'] 74 | roidb[im_i]['bbox_targets'] = compute_bbox_regression_targets(rois, max_overlaps, max_classes, cfg) 75 | 76 | if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: 77 | # use fixed / precomputed means and stds instead of empirical values 78 | means = np.tile(np.array(cfg.TRAIN.BBOX_MEANS), (num_classes, 1)) 79 | stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS), (num_classes, 1)) 80 | else: 81 | # compute mean, std values 82 | class_counts = np.zeros((num_classes, 1)) + 1e-14 83 | sums = np.zeros((num_classes, 4)) 84 | squared_sums = np.zeros((num_classes, 4)) 85 | for im_i in range(num_images): 86 | targets = roidb[im_i]['bbox_targets'] 87 | for cls in range(1, num_classes): 88 | cls_indexes = np.where(targets[:, 0] > 0)[0] if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0] 89 | if cls_indexes.size > 0: 90 | class_counts[cls] += cls_indexes.size 91 | sums[cls, :] += targets[cls_indexes, 1:].sum(axis=0) 92 | squared_sums[cls, :] += (targets[cls_indexes, 1:] ** 2).sum(axis=0) 93 | 94 | means = sums / class_counts 95 | # var(x) = E(x^2) - E(x)^2 96 | stds = np.sqrt(squared_sums / class_counts - means ** 2) 97 | 98 | print 'bbox target means:' 99 | print means 100 | print means[1:, :].mean(axis=0) # ignore bg class 101 | print 'bbox target stdevs:' 102 | print stds 103 | print stds[1:, :].mean(axis=0) # ignore bg class 104 | 105 | 106 | # normalized targets 107 | for im_i in range(num_images): 108 | targets = roidb[im_i]['bbox_targets'] 109 | for cls in range(1, num_classes): 110 | cls_indexes = np.where(targets[:, 0] > 0) if cfg.CLASS_AGNOSTIC else np.where(targets[:, 0] == cls)[0] 111 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] -= means[cls, :] 112 | roidb[im_i]['bbox_targets'][cls_indexes, 1:] /= stds[cls, :] 113 | 114 | return means.ravel(), stds.ravel() 115 | 116 | 117 | def expand_bbox_regression_targets(bbox_targets_data, num_classes, cfg): 118 | """ 119 | expand from 5 to 4 * num_classes; only the right class has non-zero bbox regression targets 120 | :param bbox_targets_data: [k * 5] 121 | :param num_classes: number of classes 122 | :return: bbox target processed [k * 4 num_classes] 123 | bbox_weights ! only foreground boxes have bbox regression computation! 124 | """ 125 | classes = bbox_targets_data[:, 0] 126 | if cfg.CLASS_AGNOSTIC: 127 | num_classes = 2 128 | bbox_targets = np.zeros((classes.size, 4 * num_classes), dtype=np.float32) 129 | bbox_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 130 | indexes = np.where(classes > 0)[0] 131 | for index in indexes: 132 | cls = classes[index] 133 | start = int(4 * 1 if cls > 0 else 0) if cfg.CLASS_AGNOSTIC else int(4 * cls) 134 | end = start + 4 135 | bbox_targets[index, start:end] = bbox_targets_data[index, 1:] 136 | bbox_weights[index, start:end] = cfg.TRAIN.BBOX_WEIGHTS 137 | return bbox_targets, bbox_weights 138 | 139 | -------------------------------------------------------------------------------- /lib/bbox/bbox_regression.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/bbox/bbox_regression.pyc -------------------------------------------------------------------------------- /lib/bbox/bbox_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from bbox import bbox_overlaps_cython 3 | 4 | 5 | def bbox_overlaps(boxes, query_boxes): 6 | return bbox_overlaps_cython(boxes, query_boxes) 7 | 8 | 9 | def bbox_overlaps_py(boxes, query_boxes): 10 | """ 11 | determine overlaps between boxes and query_boxes 12 | :param boxes: n * 4 bounding boxes 13 | :param query_boxes: k * 4 bounding boxes 14 | :return: overlaps: n * k overlaps 15 | """ 16 | n_ = boxes.shape[0] 17 | k_ = query_boxes.shape[0] 18 | overlaps = np.zeros((n_, k_), dtype=np.float) 19 | for k in range(k_): 20 | query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) 21 | for n in range(n_): 22 | iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 23 | if iw > 0: 24 | ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 25 | if ih > 0: 26 | box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) 27 | all_area = float(box_area + query_box_area - iw * ih) 28 | overlaps[n, k] = iw * ih / all_area 29 | return overlaps 30 | 31 | 32 | def clip_boxes(boxes, im_shape): 33 | """ 34 | Clip boxes to image boundaries. 35 | :param boxes: [N, 4* num_classes] 36 | :param im_shape: tuple of 2 37 | :return: [N, 4* num_classes] 38 | """ 39 | # x1 >= 0 40 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 41 | # y1 >= 0 42 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 43 | # x2 < im_shape[1] 44 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 45 | # y2 < im_shape[0] 46 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 47 | return boxes 48 | 49 | def filter_boxes(boxes, min_size): 50 | """ 51 | filter small boxes. 52 | :param boxes: [N, 4* num_classes] 53 | :param min_size: 54 | :return: keep: 55 | """ 56 | ws = boxes[:, 2] - boxes[:, 0] + 1 57 | hs = boxes[:, 3] - boxes[:, 1] + 1 58 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 59 | return keep 60 | 61 | def nonlinear_transform(ex_rois, gt_rois): 62 | """ 63 | compute bounding box regression targets from ex_rois to gt_rois 64 | :param ex_rois: [N, 4] 65 | :param gt_rois: [N, 4] 66 | :return: [N, 4] 67 | """ 68 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 69 | 70 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 71 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 72 | ex_ctr_x = ex_rois[:, 0] + 0.5 * (ex_widths - 1.0) 73 | ex_ctr_y = ex_rois[:, 1] + 0.5 * (ex_heights - 1.0) 74 | 75 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 76 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 77 | gt_ctr_x = gt_rois[:, 0] + 0.5 * (gt_widths - 1.0) 78 | gt_ctr_y = gt_rois[:, 1] + 0.5 * (gt_heights - 1.0) 79 | 80 | targets_dx = (gt_ctr_x - ex_ctr_x) / (ex_widths + 1e-14) 81 | targets_dy = (gt_ctr_y - ex_ctr_y) / (ex_heights + 1e-14) 82 | # leonid: added "+ 1e-14" 83 | targets_dw = np.log(gt_widths / (ex_widths + 1e-14)) 84 | targets_dh = np.log(gt_heights / (ex_heights + 1e-14)) 85 | 86 | # leonid: adding these tests for debugging of "RuntimeWarning: invalid value encountered in log" 87 | if np.any(gt_widths<=0) or np.any(gt_heights<=0) or np.any(ex_widths<=0) or np.any(ex_heights<=0): 88 | print('Bad values: gt_widths={0}, gt_heights={1}, ex_widths={2}, ex_heights={3}'.format( 89 | np.any(gt_widths <= 0), 90 | np.any(gt_heights <= 0), 91 | np.any(ex_widths <= 0), 92 | np.any(ex_heights <= 0) 93 | )) 94 | 95 | targets = np.vstack( 96 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 97 | return targets 98 | 99 | 100 | def nonlinear_pred(boxes, box_deltas): 101 | """ 102 | Transform the set of class-agnostic boxes into class-specific boxes 103 | by applying the predicted offsets (box_deltas) 104 | :param boxes: !important [N 4] 105 | :param box_deltas: [N, 4 * num_classes] 106 | :return: [N 4 * num_classes] 107 | """ 108 | if boxes.shape[0] == 0: 109 | return np.zeros((0, box_deltas.shape[1])) 110 | 111 | boxes = boxes.astype(np.float, copy=False) 112 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 113 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 114 | ctr_x = boxes[:, 0] + 0.5 * (widths - 1.0) 115 | ctr_y = boxes[:, 1] + 0.5 * (heights - 1.0) 116 | 117 | dx = box_deltas[:, 0::4] 118 | dy = box_deltas[:, 1::4] 119 | dw = box_deltas[:, 2::4] 120 | dh = box_deltas[:, 3::4] 121 | 122 | # Leonid: adding protection against overflow 123 | dw = np.minimum(dw, 10) 124 | dh = np.minimum(dh, 10) 125 | 126 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 127 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 128 | pred_w = np.exp(dw) * widths[:, np.newaxis] 129 | pred_h = np.exp(dh) * heights[:, np.newaxis] 130 | 131 | pred_boxes = np.zeros(box_deltas.shape) 132 | # x1 133 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * (pred_w - 1.0) 134 | # y1 135 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * (pred_h - 1.0) 136 | # x2 137 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * (pred_w - 1.0) 138 | # y2 139 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * (pred_h - 1.0) 140 | 141 | return pred_boxes 142 | 143 | 144 | def iou_transform(ex_rois, gt_rois): 145 | """ return bbox targets, IoU loss uses gt_rois as gt """ 146 | assert ex_rois.shape[0] == gt_rois.shape[0], 'inconsistent rois number' 147 | return gt_rois 148 | 149 | 150 | def iou_pred(boxes, box_deltas): 151 | """ 152 | Transform the set of class-agnostic boxes into class-specific boxes 153 | by applying the predicted offsets (box_deltas) 154 | :param boxes: !important [N 4] 155 | :param box_deltas: [N, 4 * num_classes] 156 | :return: [N 4 * num_classes] 157 | """ 158 | if boxes.shape[0] == 0: 159 | return np.zeros((0, box_deltas.shape[1])) 160 | 161 | boxes = boxes.astype(np.float, copy=False) 162 | x1 = boxes[:, 0] 163 | y1 = boxes[:, 1] 164 | x2 = boxes[:, 2] 165 | y2 = boxes[:, 3] 166 | 167 | dx1 = box_deltas[:, 0::4] 168 | dy1 = box_deltas[:, 1::4] 169 | dx2 = box_deltas[:, 2::4] 170 | dy2 = box_deltas[:, 3::4] 171 | 172 | pred_boxes = np.zeros(box_deltas.shape) 173 | # x1 174 | pred_boxes[:, 0::4] = dx1 + x1[:, np.newaxis] 175 | # y1 176 | pred_boxes[:, 1::4] = dy1 + y1[:, np.newaxis] 177 | # x2 178 | pred_boxes[:, 2::4] = dx2 + x2[:, np.newaxis] 179 | # y2 180 | pred_boxes[:, 3::4] = dy2 + y2[:, np.newaxis] 181 | 182 | return pred_boxes 183 | 184 | 185 | # define bbox_transform and bbox_pred 186 | bbox_transform = nonlinear_transform 187 | bbox_pred = nonlinear_pred 188 | -------------------------------------------------------------------------------- /lib/bbox/bbox_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/bbox/bbox_transform.pyc -------------------------------------------------------------------------------- /lib/bbox/bbox_utils.py: -------------------------------------------------------------------------------- 1 | def bb_overlap(bb_array, bb_GT): 2 | import numpy as np 3 | 4 | # bb_GT: [Ninstances,4] matrix 5 | # bb_array: [Nrois, 4] 6 | # for every row 7 | if bb_GT.ndim == 1: 8 | bb_GT = np.expand_dims(bb_GT,0) 9 | overlaps = np.zeros((bb_array.shape[0], bb_GT.shape[0]), np.float32) # [Nrois, Ninstances] 10 | for i,GT in enumerate(bb_GT): # go over rows 11 | # intersection 12 | ixmin = np.maximum(bb_array[:, 0], GT[0]) # [Nrois, 1] 13 | iymin = np.maximum(bb_array[:, 1], GT[1]) 14 | ixmax = np.minimum(bb_array[:, 2], GT[2]) 15 | iymax = np.minimum(bb_array[:, 3], GT[3]) 16 | iw = np.maximum(ixmax - ixmin + 1., 0.) 17 | ih = np.maximum(iymax - iymin + 1., 0.) 18 | inters = iw * ih # [Nrois, 1] 19 | 20 | # union 21 | uni = ((GT[2] - GT[0] + 1.) * (GT[3] - GT[1] + 1.) + 22 | (bb_array[:, 2] - bb_array[:, 0] + 1.) * 23 | (bb_array[:, 3] - bb_array[:, 1] + 1.) - inters) 24 | 25 | overlaps[:, i] = inters / uni 26 | 27 | return overlaps -------------------------------------------------------------------------------- /lib/bbox/bbox_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/bbox/bbox_utils.pyc -------------------------------------------------------------------------------- /lib/bbox/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Copyright (c) 2019 IBM Corp 6 | # Licensed under The Apache-2.0 License [see LICENSE for details] 7 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 8 | # -------------------------------------------------------- 9 | 10 | 11 | import os 12 | from os.path import join as pjoin 13 | from setuptools import setup 14 | from distutils.extension import Extension 15 | from Cython.Distutils import build_ext 16 | import numpy as np 17 | 18 | # Obtain the numpy include directory. This logic works across numpy versions. 19 | try: 20 | numpy_include = np.get_include() 21 | except AttributeError: 22 | numpy_include = np.get_numpy_include() 23 | 24 | 25 | def customize_compiler_for_nvcc(self): 26 | """inject deep into distutils to customize how the dispatch 27 | to gcc/nvcc works. 28 | If you subclass UnixCCompiler, it's not trivial to get your subclass 29 | injected in, and still have the right customizations (i.e. 30 | distutils.sysconfig.customize_compiler) run on it. So instead of going 31 | the OO route, I have this. Note, it's kindof like a wierd functional 32 | subclassing going on.""" 33 | 34 | # tell the compiler it can processes .cu 35 | self.src_extensions.append('.cu') 36 | 37 | # save references to the default compiler_so and _comple methods 38 | default_compiler_so = self.compiler_so 39 | super = self._compile 40 | 41 | # now redefine the _compile method. This gets executed for each 42 | # object but distutils doesn't have the ability to change compilers 43 | # based on source extension: we add it. 44 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 45 | if os.path.splitext(src)[1] == '.cu': 46 | # use the cuda for .cu files 47 | self.set_executable('compiler_so', CUDA['nvcc']) 48 | # use only a subset of the extra_postargs, which are 1-1 translated 49 | # from the extra_compile_args in the Extension class 50 | postargs = extra_postargs['nvcc'] 51 | else: 52 | postargs = extra_postargs['gcc'] 53 | 54 | super(obj, src, ext, cc_args, postargs, pp_opts) 55 | # reset the default compiler_so, which we might have changed for cuda 56 | self.compiler_so = default_compiler_so 57 | 58 | # inject our redefined _compile method into the class 59 | self._compile = _compile 60 | 61 | 62 | # run the customize_compiler 63 | class custom_build_ext(build_ext): 64 | def build_extensions(self): 65 | customize_compiler_for_nvcc(self.compiler) 66 | build_ext.build_extensions(self) 67 | 68 | 69 | ext_modules = [ 70 | Extension( 71 | "bbox", 72 | ["bbox.pyx"], 73 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 74 | include_dirs=[numpy_include] 75 | ), 76 | ] 77 | 78 | setup( 79 | name='bbox_cython', 80 | ext_modules=ext_modules, 81 | # inject our custom trigger 82 | cmdclass={'build_ext': custom_build_ext}, 83 | ) 84 | -------------------------------------------------------------------------------- /lib/bbox/setup_windows.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Copyright (c) 2019 IBM Corp 6 | # Licensed under The Apache-2.0 License [see LICENSE for details] 7 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 8 | # -------------------------------------------------------- 9 | 10 | import numpy as np 11 | import os 12 | from os.path import join as pjoin 13 | #from distutils.core import setup 14 | from setuptools import setup 15 | from distutils.extension import Extension 16 | from Cython.Distutils import build_ext 17 | import subprocess 18 | 19 | #change for windows, by MrX 20 | nvcc_bin = 'nvcc.exe' 21 | lib_dir = 'lib/x64' 22 | 23 | import distutils.msvc9compiler 24 | distutils.msvc9compiler.VERSION = 14.0 25 | 26 | # Obtain the numpy include directory. This logic works across numpy versions. 27 | try: 28 | numpy_include = np.get_include() 29 | except AttributeError: 30 | numpy_include = np.get_numpy_include() 31 | 32 | ext_modules = [ 33 | # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts 34 | Extension( 35 | "bbox", 36 | sources=["bbox.pyx"], 37 | extra_compile_args={}, 38 | include_dirs = [numpy_include] 39 | ), 40 | ] 41 | 42 | setup( 43 | name='fast_rcnn', 44 | ext_modules=ext_modules, 45 | # inject our custom trigger 46 | cmdclass={'build_ext': build_ext}, 47 | ) 48 | -------------------------------------------------------------------------------- /lib/dataset/Defects.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/Defects.pyc -------------------------------------------------------------------------------- /lib/dataset/Logo.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/Logo.pyc -------------------------------------------------------------------------------- /lib/dataset/SCENT.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------- 2 | # Part of RepMet codebase 3 | # Joseph Shtok josephs@il.ibm.com, CVAR team, IBM Research AI 4 | # ----------------------------------------------------------- 5 | 6 | """ 7 | JES_poc database 8 | This class loads ground truth notations from SCENT XML data format 9 | and transform them into IMDB format. Selective search is used for proposals, see roidb function. 10 | """ 11 | 12 | import cPickle 13 | import cv2 14 | import os 15 | import numpy as np 16 | import PIL 17 | import scipy.io as sio 18 | 19 | from imdb import IMDB 20 | from imagenet_voc_eval import voc_eval, voc_eval_sds 21 | from ds_utils import unique_boxes, filter_small_boxes 22 | 23 | class SCENT(IMDB): 24 | def __init__(self, image_set, root_path, dataset_path, result_path=None, mask_size=-1, 25 | binary_thresh=None, categ_index_offs=0, per_category_epoch_max=0, 26 | classes_list_fname='SCENT_base_classes.txt', 27 | num_ex_per_class=''): 28 | """ 29 | fill basic information to initialize imdb 30 | :param image_set: poc_train, poc_val, poc_test, pilot_* # 2007_trainval, 2007_test, etc 31 | :param root_path: 'selective_search_data' and 'cache' 32 | :param dataset_path: data and results 33 | :return: imdb object 34 | """ 35 | 36 | sis = image_set.split(':') # take just the first dataset in case a sequence is passed 37 | if len(sis) > 1: 38 | image_set = sis[0] 39 | self.per_category_epoch_max = per_category_epoch_max 40 | self.root_path = root_path 41 | self.dataset_path = dataset_path 42 | database_csv_fname = [] 43 | if image_set[0:4]=='base': 44 | database_csv_fname = 'SCENT_train_db.csv' 45 | self.dataset_type = 'base' 46 | 47 | self.database_csv_fname = os.path.join(dataset_path,database_csv_fname) 48 | self.classes_list_fname = os.path.join(dataset_path,classes_list_fname) 49 | 50 | self.image_set = image_set 51 | self.name = 'SCENT_'+image_set 52 | with open(self.classes_list_fname, 'r') as fid: 53 | self.classes = [x.strip() for x in fid.readlines()] 54 | self.num_classes = len(self.classes) 55 | self._class_to_ind = dict(zip(self.classes, xrange(1,self.num_classes+1))) 56 | 57 | self.image_set_index =0 58 | self._result_path = '' #TODO: find out why do I need this field here 59 | 60 | def image_path_from_index(self, index): 61 | """ 62 | given image index, find out full path 63 | :param index: index of a specific image 64 | :return: full path of this image 65 | """ 66 | image_file = os.path.join(self.dataset_path, 'Data', self.image_set, index + '.JPEG') 67 | assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file) 68 | return image_file 69 | 70 | def segmentation_path_from_index(self, index): 71 | """ 72 | given image index, find out the full path of segmentation class 73 | :param index: index of a specific image 74 | :return: full path of segmentation class 75 | """ 76 | # seg_class_file = os.path.join(self.dataset_path, 'SegmentationClass', index + '.png') 77 | # assert os.path.exists(seg_class_file), 'Path does not exist: {}'.format(seg_class_file) 78 | # return seg_class_file 79 | raise NotImplementedError 80 | 81 | def gt_roidb(self): 82 | """ 83 | return ground truth image regions database 84 | :return: imdb[image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] 85 | """ 86 | cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') 87 | if os.path.exists(cache_file): 88 | with open(cache_file, 'rb') as fid: 89 | roidb = cPickle.load(fid) 90 | print '{} gt roidb loaded from {}'.format(self.name, cache_file) 91 | self.num_images = len(roidb) 92 | self.roidb = roidb 93 | return roidb 94 | 95 | 96 | # produce roidb --------------------------------------------------------------- 162 | 163 | # gt_roidb = [] 164 | # for ii, index in enumerate(self.image_set_index): 165 | # if (ii % 1000) == 0: 166 | # print('Processing image {0} of {1}'.format(ii,len(self.image_set_index))) 167 | # gt_roidb.append(self.load_imagenet_annotation(index)) 168 | with open(cache_file, 'wb') as fid: 169 | cPickle.dump(self.roidb, fid, cPickle.HIGHEST_PROTOCOL) # gt_roidb 170 | print 'wrote gt roidb to {}'.format(cache_file) 171 | return self.roidb 172 | 173 | -------------------------------------------------------------------------------- /lib/dataset/SCENT.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/SCENT.pyc -------------------------------------------------------------------------------- /lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from Defects import Defects 2 | from SCENT import SCENT 3 | from Logo import Logo 4 | # from imdb import IMDB 5 | from pascal_voc import PascalVOC 6 | # from cityscape import CityScape 7 | from coco import coco 8 | from imagenet import ImageNet 9 | -------------------------------------------------------------------------------- /lib/dataset/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/__init__.pyc -------------------------------------------------------------------------------- /lib/dataset/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/coco.pyc -------------------------------------------------------------------------------- /lib/dataset/ds_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def unique_boxes(boxes, scale=1.0): 5 | """ return indices of unique boxes """ 6 | v = np.array([1, 1e3, 1e6, 1e9]) 7 | hashes = np.round(boxes * scale).dot(v) 8 | _, index = np.unique(hashes, return_index=True) 9 | return np.sort(index) 10 | 11 | 12 | def filter_small_boxes(boxes, min_size): 13 | w = boxes[:, 2] - boxes[:, 0] 14 | h = boxes[:, 3] - boxes[:, 1] 15 | keep = np.where((w >= min_size) & (h > min_size))[0] 16 | return keep -------------------------------------------------------------------------------- /lib/dataset/ds_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/ds_utils.pyc -------------------------------------------------------------------------------- /lib/dataset/imagenet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/imagenet.pyc -------------------------------------------------------------------------------- /lib/dataset/imagenet_voc_eval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/imagenet_voc_eval.pyc -------------------------------------------------------------------------------- /lib/dataset/imdb.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/imdb.pyc -------------------------------------------------------------------------------- /lib/dataset/pascal_voc.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pascal_voc.pyc -------------------------------------------------------------------------------- /lib/dataset/pascal_voc_eval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pascal_voc_eval.pyc -------------------------------------------------------------------------------- /lib/dataset/pycocotools/.gitignore: -------------------------------------------------------------------------------- 1 | _mask.c 2 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pycocotools/__init__.pyc -------------------------------------------------------------------------------- /lib/dataset/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pycocotools/_mask.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/dataset/pycocotools/_mask.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pycocotools/_mask.so -------------------------------------------------------------------------------- /lib/dataset/pycocotools/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pycocotools/coco.pyc -------------------------------------------------------------------------------- /lib/dataset/pycocotools/cocoeval.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pycocotools/cocoeval.pyc -------------------------------------------------------------------------------- /lib/dataset/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import _mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | #decode = _mask.decode 78 | def decode(rleObjs): 79 | if type(rleObjs) == list: 80 | return _mask.decode(rleObjs) 81 | else: 82 | return _mask.decode([rleObjs])[:,:,0] 83 | iou = _mask.iou 84 | merge = _mask.merge 85 | area = _mask.area 86 | toBbox = _mask.toBbox 87 | frPyObjects = _mask.frPyObjects 88 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/mask.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/dataset/pycocotools/mask.pyc -------------------------------------------------------------------------------- /lib/dataset/pycocotools/maskApi.c: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #include "maskApi.h" 8 | #include 9 | #include 10 | 11 | uint umin( uint a, uint b ) { return (ab) ? a : b; } 13 | 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { 15 | R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); 16 | if(cnts) for(siz j=0; jcnts[j]=cnts[j]; 17 | } 18 | 19 | void rleFree( RLE *R ) { 20 | free(R->cnts); R->cnts=0; 21 | } 22 | 23 | void rlesInit( RLE **R, siz n ) { 24 | *R = (RLE*) malloc(sizeof(RLE)*n); 25 | for(siz i=0; i0 ) { 61 | c=umin(ca,cb); cc+=c; ct=0; 62 | ca-=c; if(!ca && a0) { 83 | crowd=iscrowd!=NULL && iscrowd[g]; 84 | if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } 85 | siz ka, kb, a, b; uint c, ca, cb, ct, i, u; bool va, vb; 86 | ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; 87 | cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; 88 | while( ct>0 ) { 89 | c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; 90 | ca-=c; if(!ca && ad?1:c=dy && xs>xe) || (dxye); 151 | if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } 152 | s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; 153 | if(dx>=dy) for( int d=0; d<=dx; d++ ) { 154 | t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; 155 | } else for( int d=0; d<=dy; d++ ) { 156 | t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; 157 | } 158 | } 159 | // get points along y-boundary and downsample 160 | free(x); free(y); k=m; m=0; double xd, yd; 161 | x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); 162 | for( j=1; jw-1 ) continue; 165 | yd=(double)(v[j]h) yd=h; yd=ceil(yd); 167 | x[m]=(int) xd; y[m]=(int) yd; m++; 168 | } 169 | // compute rle encoding given y-boundary points 170 | k=m; a=malloc(sizeof(uint)*(k+1)); 171 | for( j=0; j0) b[m++]=a[j++]; else { 177 | j++; if(jm, p=0; long x; bool more; 184 | char *s=malloc(sizeof(char)*m*6); 185 | for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; 187 | while( more ) { 188 | char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; 189 | if(more) c |= 0x20; c+=48; s[p++]=c; 190 | } 191 | } 192 | s[p]=0; return s; 193 | } 194 | 195 | void rleFrString( RLE *R, char *s, siz h, siz w ) { 196 | siz m=0, p=0, k; long x; bool more; uint *cnts; 197 | while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; 198 | while( s[p] ) { 199 | x=0; k=0; more=1; 200 | while( more ) { 201 | char c=s[p]-48; x |= (c & 0x1f) << 5*k; 202 | more = c & 0x20; p++; k++; 203 | if(!more && (c & 0x10)) x |= -1 << 5*k; 204 | } 205 | if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; 206 | } 207 | rleInit(R,h,w,m,cnts); free(cnts); 208 | } 209 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/setup_linux.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['maskApi.c', '_mask.pyx'], 13 | include_dirs=[np.get_include()], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | ext_modules=cythonize(ext_modules) 20 | ) 21 | -------------------------------------------------------------------------------- /lib/dataset/pycocotools/setup_windows.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | import distutils.msvc9compiler 7 | distutils.msvc9compiler.VERSION = 14.0 8 | 9 | 10 | # To compile and install locally run "python setup.py build_ext --inplace" 11 | # To install library to Python site-packages run "python setup.py build_ext install" 12 | 13 | ext_modules = [ 14 | Extension( 15 | '_mask', 16 | sources=['maskApi.c', '_mask.pyx'], 17 | include_dirs=[np.get_include()], 18 | extra_compile_args=[], 19 | ) 20 | ] 21 | 22 | setup(name='pycocotools', 23 | ext_modules=cythonize(ext_modules) 24 | ) 25 | -------------------------------------------------------------------------------- /lib/mask/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/mask/__init__.py -------------------------------------------------------------------------------- /lib/mask/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/mask/__init__.pyc -------------------------------------------------------------------------------- /lib/mask/mask_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Haozhi Qi, Yi Li, Guodong Zhang 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | 11 | 12 | def intersect_box_mask(ex_box, gt_box, gt_mask): 13 | """ 14 | This function calculate the intersection part of a external box 15 | and gt_box, mask it according to gt_mask 16 | Args: 17 | ex_box: external ROIS 18 | gt_box: ground truth boxes 19 | gt_mask: ground truth masks, not been resized yet 20 | Returns: 21 | regression_target: logical numpy array 22 | """ 23 | x1 = max(ex_box[0], gt_box[0]) 24 | y1 = max(ex_box[1], gt_box[1]) 25 | x2 = min(ex_box[2], gt_box[2]) 26 | y2 = min(ex_box[3], gt_box[3]) 27 | if x1 > x2 or y1 > y2: 28 | return np.zeros((21, 21), dtype=bool) 29 | w = x2 - x1 + 1 30 | h = y2 - y1 + 1 31 | ex_starty = y1 - ex_box[1] 32 | ex_startx = x1 - ex_box[0] 33 | 34 | inter_maskb = gt_mask[y1:y2+1 , x1:x2+1] 35 | regression_target = np.zeros((ex_box[3] - ex_box[1] + 1, ex_box[2] - ex_box[0] + 1)) 36 | regression_target[ex_starty: ex_starty + h, ex_startx: ex_startx + w] = inter_maskb 37 | 38 | return regression_target 39 | 40 | 41 | def mask_overlap(box1, box2, mask1, mask2): 42 | """ 43 | This function calculate region IOU when masks are 44 | inside different boxes 45 | Returns: 46 | intersection over unions of this two masks 47 | """ 48 | x1 = max(box1[0], box2[0]) 49 | y1 = max(box1[1], box2[1]) 50 | x2 = min(box1[2], box2[2]) 51 | y2 = min(box1[3], box2[3]) 52 | if x1 > x2 or y1 > y2: 53 | return 0 54 | w = x2 - x1 + 1 55 | h = y2 - y1 + 1 56 | # get masks in the intersection part 57 | start_ya = y1 - box1[1] 58 | start_xa = x1 - box1[0] 59 | inter_maska = mask1[start_ya: start_ya + h, start_xa:start_xa + w] 60 | 61 | start_yb = y1 - box2[1] 62 | start_xb = x1 - box2[0] 63 | inter_maskb = mask2[start_yb: start_yb + h, start_xb:start_xb + w] 64 | 65 | assert inter_maska.shape == inter_maskb.shape 66 | 67 | inter = np.logical_and(inter_maskb, inter_maska).sum() 68 | union = mask1.sum() + mask2.sum() - inter 69 | if union < 1.0: 70 | return 0 71 | return float(inter) / float(union) 72 | -------------------------------------------------------------------------------- /lib/mask/mask_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/mask/mask_transform.pyc -------------------------------------------------------------------------------- /lib/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/__init__.py -------------------------------------------------------------------------------- /lib/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/__init__.pyc -------------------------------------------------------------------------------- /lib/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i') 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /lib/nms/cpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/cpu_nms.so -------------------------------------------------------------------------------- /lib/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int32_t, ndim=1] \ 26 | order = scores.argsort()[::-1].astype(np.int32) 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /lib/nms/gpu_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/gpu_nms.so -------------------------------------------------------------------------------- /lib/nms/gpu_nms_10.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/gpu_nms_10.so -------------------------------------------------------------------------------- /lib/nms/gpu_nms_8.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/gpu_nms_8.so -------------------------------------------------------------------------------- /lib/nms/gpu_nms_9.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/gpu_nms_9.so -------------------------------------------------------------------------------- /lib/nms/nms.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cpu_nms import cpu_nms 4 | from gpu_nms import gpu_nms 5 | 6 | 7 | def py_nms_wrapper(thresh): 8 | def _nms(dets): 9 | return nms(dets, thresh) 10 | return _nms 11 | 12 | 13 | def py_softnms_wrapper(thresh, max_dets=-1): 14 | def _nms(dets): 15 | return soft_nms(dets, thresh, max_dets) 16 | return _nms 17 | 18 | 19 | def cpu_nms_wrapper(thresh): 20 | def _nms(dets): 21 | return cpu_nms(dets, thresh) 22 | return _nms 23 | 24 | 25 | def gpu_nms_wrapper(thresh, device_id): 26 | def _nms(dets): 27 | return gpu_nms(dets, thresh, device_id) 28 | return _nms 29 | 30 | 31 | def nms(dets, thresh): 32 | """ 33 | greedily select boxes with high confidence and overlap with current maximum <= thresh 34 | rule out overlap >= thresh 35 | :param dets: [[x1, y1, x2, y2 score]] 36 | :param thresh: retain overlap < thresh 37 | :return: indexes to keep 38 | """ 39 | if dets.shape[0] == 0: 40 | return [] 41 | 42 | x1 = dets[:, 0] 43 | y1 = dets[:, 1] 44 | x2 = dets[:, 2] 45 | y2 = dets[:, 3] 46 | scores = dets[:, 4] 47 | 48 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 49 | order = scores.argsort()[::-1] 50 | 51 | keep = [] 52 | while order.size > 0: 53 | i = order[0] 54 | keep.append(i) 55 | xx1 = np.maximum(x1[i], x1[order[1:]]) 56 | yy1 = np.maximum(y1[i], y1[order[1:]]) 57 | xx2 = np.minimum(x2[i], x2[order[1:]]) 58 | yy2 = np.minimum(y2[i], y2[order[1:]]) 59 | 60 | w = np.maximum(0.0, xx2 - xx1 + 1) 61 | h = np.maximum(0.0, yy2 - yy1 + 1) 62 | inter = w * h 63 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 64 | 65 | inds = np.where(ovr <= thresh)[0] 66 | order = order[inds + 1] 67 | 68 | return keep 69 | 70 | 71 | def rescore(overlap, scores, thresh, type='gaussian'): 72 | assert overlap.shape[0] == scores.shape[0] 73 | if type == 'linear': 74 | inds = np.where(overlap >= thresh)[0] 75 | scores[inds] = scores[inds] * (1 - overlap[inds]) 76 | else: 77 | scores = scores * np.exp(- overlap**2 / thresh) 78 | 79 | return scores 80 | 81 | 82 | def soft_nms(dets, thresh, max_dets): 83 | if dets.shape[0] == 0: 84 | return np.zeros((0, 5)) 85 | 86 | x1 = dets[:, 0] 87 | y1 = dets[:, 1] 88 | x2 = dets[:, 2] 89 | y2 = dets[:, 3] 90 | scores = dets[:, 4] 91 | 92 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 93 | order = scores.argsort()[::-1] 94 | scores = scores[order] 95 | 96 | if max_dets == -1: 97 | max_dets = order.size 98 | 99 | keep = np.zeros(max_dets, dtype=np.intp) 100 | keep_cnt = 0 101 | 102 | while order.size > 0 and keep_cnt < max_dets: 103 | i = order[0] 104 | dets[i, 4] = scores[0] 105 | xx1 = np.maximum(x1[i], x1[order[1:]]) 106 | yy1 = np.maximum(y1[i], y1[order[1:]]) 107 | xx2 = np.minimum(x2[i], x2[order[1:]]) 108 | yy2 = np.minimum(y2[i], y2[order[1:]]) 109 | 110 | w = np.maximum(0.0, xx2 - xx1 + 1) 111 | h = np.maximum(0.0, yy2 - yy1 + 1) 112 | inter = w * h 113 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 114 | 115 | order = order[1:] 116 | scores = rescore(ovr, scores[1:], thresh) 117 | 118 | tmp = scores.argsort()[::-1] 119 | order = order[tmp] 120 | scores = scores[tmp] 121 | 122 | keep[keep_cnt] = i 123 | keep_cnt += 1 124 | 125 | keep = keep[:keep_cnt] 126 | dets = dets[keep, :] 127 | return dets 128 | -------------------------------------------------------------------------------- /lib/nms/nms.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/nms/nms.pyc -------------------------------------------------------------------------------- /lib/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Deformable Convolutional Networks 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License 5 | // Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn) 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/nms/setup_linux.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | from setuptools import setup 11 | from distutils.extension import Extension 12 | from Cython.Distutils import build_ext 13 | import numpy as np 14 | 15 | 16 | def find_in_path(name, path): 17 | "Find a file in a search path" 18 | # Adapted fom 19 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | def locate_cuda(): 28 | """Locate the CUDA environment on the system 29 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 30 | and values giving the absolute path to each directory. 31 | Starts by looking for the CUDAHOME env variable. If not found, everything 32 | is based on finding 'nvcc' in the PATH. 33 | """ 34 | 35 | # first check if the CUDAHOME env variable is in use 36 | if 'CUDAHOME' in os.environ: 37 | home = os.environ['CUDAHOME'] 38 | nvcc = pjoin(home, 'bin', 'nvcc') 39 | else: 40 | # otherwise, search the PATH for NVCC 41 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 42 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 43 | if nvcc is None: 44 | raise EnvironmentError('The nvcc binary could not be ' 45 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 46 | home = os.path.dirname(os.path.dirname(nvcc)) 47 | 48 | cudaconfig = {'home':home, 'nvcc':nvcc, 49 | 'include': pjoin(home, 'include'), 50 | 'lib64': pjoin(home, 'lib64')} 51 | for k, v in cudaconfig.iteritems(): 52 | if not os.path.exists(v): 53 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 54 | 55 | return cudaconfig 56 | CUDA = locate_cuda() 57 | 58 | 59 | # Obtain the numpy include directory. This logic works across numpy versions. 60 | try: 61 | numpy_include = np.get_include() 62 | except AttributeError: 63 | numpy_include = np.get_numpy_include() 64 | 65 | 66 | def customize_compiler_for_nvcc(self): 67 | """inject deep into distutils to customize how the dispatch 68 | to gcc/nvcc works. 69 | If you subclass UnixCCompiler, it's not trivial to get your subclass 70 | injected in, and still have the right customizations (i.e. 71 | distutils.sysconfig.customize_compiler) run on it. So instead of going 72 | the OO route, I have this. Note, it's kindof like a wierd functional 73 | subclassing going on.""" 74 | 75 | # tell the compiler it can processes .cu 76 | self.src_extensions.append('.cu') 77 | 78 | # save references to the default compiler_so and _comple methods 79 | default_compiler_so = self.compiler_so 80 | super = self._compile 81 | 82 | # now redefine the _compile method. This gets executed for each 83 | # object but distutils doesn't have the ability to change compilers 84 | # based on source extension: we add it. 85 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 86 | if os.path.splitext(src)[1] == '.cu': 87 | # use the cuda for .cu files 88 | self.set_executable('compiler_so', CUDA['nvcc']) 89 | # use only a subset of the extra_postargs, which are 1-1 translated 90 | # from the extra_compile_args in the Extension class 91 | postargs = extra_postargs['nvcc'] 92 | else: 93 | postargs = extra_postargs['gcc'] 94 | 95 | super(obj, src, ext, cc_args, postargs, pp_opts) 96 | # reset the default compiler_so, which we might have changed for cuda 97 | self.compiler_so = default_compiler_so 98 | 99 | # inject our redefined _compile method into the class 100 | self._compile = _compile 101 | 102 | 103 | # run the customize_compiler 104 | class custom_build_ext(build_ext): 105 | def build_extensions(self): 106 | customize_compiler_for_nvcc(self.compiler) 107 | build_ext.build_extensions(self) 108 | 109 | 110 | ext_modules = [ 111 | Extension( 112 | "cpu_nms", 113 | ["cpu_nms.pyx"], 114 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 115 | include_dirs = [numpy_include] 116 | ), 117 | Extension('gpu_nms', 118 | ['nms_kernel.cu', 'gpu_nms.pyx'], 119 | library_dirs=[CUDA['lib64']], 120 | libraries=['cudart'], 121 | language='c++', 122 | runtime_library_dirs=[CUDA['lib64']], 123 | # this syntax is specific to this build system 124 | # we're only going to use certain compiler args with nvcc and not with 125 | # gcc the implementation of this trick is in customize_compiler() below 126 | extra_compile_args={'gcc': ["-Wno-unused-function"], 127 | 'nvcc': ['-arch=sm_35', 128 | '--ptxas-options=-v', 129 | '-c', 130 | '--compiler-options', 131 | "'-fPIC'"]}, 132 | include_dirs = [numpy_include, CUDA['include']] 133 | ), 134 | ] 135 | 136 | setup( 137 | name='nms', 138 | ext_modules=ext_modules, 139 | # inject our custom trigger 140 | cmdclass={'build_ext': custom_build_ext}, 141 | ) 142 | -------------------------------------------------------------------------------- /lib/nms/setup_windows.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import os 10 | from os.path import join as pjoin 11 | #from distutils.core import setup 12 | from setuptools import setup 13 | from distutils.extension import Extension 14 | from Cython.Distutils import build_ext 15 | import subprocess 16 | 17 | #change for windows, by MrX 18 | nvcc_bin = 'nvcc.exe' 19 | lib_dir = 'lib/x64' 20 | 21 | import distutils.msvc9compiler 22 | distutils.msvc9compiler.VERSION = 14.0 23 | 24 | 25 | def find_in_path(name, path): 26 | "Find a file in a search path" 27 | # Adapted fom 28 | # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 29 | for dir in path.split(os.pathsep): 30 | binpath = pjoin(dir, name) 31 | if os.path.exists(binpath): 32 | return os.path.abspath(binpath) 33 | return None 34 | 35 | 36 | def locate_cuda(): 37 | """Locate the CUDA environment on the system 38 | 39 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 40 | and values giving the absolute path to each directory. 41 | 42 | Starts by looking for the CUDAHOME env variable. If not found, everything 43 | is based on finding 'nvcc' in the PATH. 44 | """ 45 | 46 | # first check if the CUDAHOME env variable is in use 47 | if 'CUDA_PATH' in os.environ: 48 | home = os.environ['CUDA_PATH'] 49 | print("home = %s\n" % home) 50 | nvcc = pjoin(home, 'bin', nvcc_bin) 51 | else: 52 | # otherwise, search the PATH for NVCC 53 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 54 | nvcc = find_in_path(nvcc_bin, os.environ['PATH'] + os.pathsep + default_path) 55 | if nvcc is None: 56 | raise EnvironmentError('The nvcc binary could not be ' 57 | 'located in your $PATH. Either add it to your path, or set $CUDA_PATH') 58 | home = os.path.dirname(os.path.dirname(nvcc)) 59 | print("home = %s, nvcc = %s\n" % (home, nvcc)) 60 | 61 | 62 | cudaconfig = {'home':home, 'nvcc':nvcc, 63 | 'include': pjoin(home, 'include'), 64 | 'lib64': pjoin(home, lib_dir)} 65 | for k, v in cudaconfig.iteritems(): 66 | if not os.path.exists(v): 67 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 68 | 69 | return cudaconfig 70 | CUDA = locate_cuda() 71 | 72 | 73 | # Obtain the numpy include directory. This logic works across numpy versions. 74 | try: 75 | numpy_include = np.get_include() 76 | except AttributeError: 77 | numpy_include = np.get_numpy_include() 78 | 79 | 80 | def customize_compiler_for_nvcc(self): 81 | """inject deep into distutils to customize how the dispatch 82 | to gcc/nvcc works. 83 | 84 | If you subclass UnixCCompiler, it's not trivial to get your subclass 85 | injected in, and still have the right customizations (i.e. 86 | distutils.sysconfig.customize_compiler) run on it. So instead of going 87 | the OO route, I have this. Note, it's kindof like a wierd functional 88 | subclassing going on.""" 89 | 90 | # tell the compiler it can processes .cu 91 | #self.src_extensions.append('.cu') 92 | 93 | 94 | # save references to the default compiler_so and _comple methods 95 | #default_compiler_so = self.spawn 96 | #default_compiler_so = self.rc 97 | super = self.compile 98 | 99 | # now redefine the _compile method. This gets executed for each 100 | # object but distutils doesn't have the ability to change compilers 101 | # based on source extension: we add it. 102 | def compile(sources, output_dir=None, macros=None, include_dirs=None, debug=0, extra_preargs=None, extra_postargs=None, depends=None): 103 | postfix=os.path.splitext(sources[0])[1] 104 | 105 | if postfix == '.cu': 106 | # use the cuda for .cu files 107 | #self.set_executable('compiler_so', CUDA['nvcc']) 108 | # use only a subset of the extra_postargs, which are 1-1 translated 109 | # from the extra_compile_args in the Extension class 110 | postargs = extra_postargs['nvcc'] 111 | else: 112 | postargs = extra_postargs['gcc'] 113 | 114 | 115 | return super(sources, output_dir, macros, include_dirs, debug, extra_preargs, postargs, depends) 116 | # reset the default compiler_so, which we might have changed for cuda 117 | #self.rc = default_compiler_so 118 | 119 | # inject our redefined _compile method into the class 120 | self.compile = compile 121 | 122 | 123 | # run the customize_compiler 124 | class custom_build_ext(build_ext): 125 | def build_extensions(self): 126 | customize_compiler_for_nvcc(self.compiler) 127 | build_ext.build_extensions(self) 128 | 129 | 130 | ext_modules = [ 131 | # unix _compile: obj, src, ext, cc_args, extra_postargs, pp_opts 132 | Extension( 133 | "cpu_nms", 134 | sources=["cpu_nms.pyx"], 135 | extra_compile_args={'gcc': []}, 136 | include_dirs = [numpy_include], 137 | ), 138 | ] 139 | 140 | setup( 141 | name='fast_rcnn', 142 | ext_modules=ext_modules, 143 | # inject our custom trigger 144 | cmdclass={'build_ext': custom_build_ext}, 145 | ) 146 | -------------------------------------------------------------------------------- /lib/nms/setup_windows_cuda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import os 5 | # on Windows, we need the original PATH without Anaconda's compiler in it: 6 | PATH = os.environ.get('PATH') + ';C:\\Program Files (x86)\\Microsoft Visual Studio 14.0\\VC\\bin' 7 | from distutils.spawn import spawn, find_executable 8 | from setuptools import setup, find_packages, Extension 9 | from setuptools.command.build_ext import build_ext 10 | import sys 11 | 12 | # CUDA specific config 13 | # nvcc is assumed to be in user's PATH 14 | nvcc_compile_args = ['-O', '--ptxas-options=-v', '-arch=compute_35', '-code=sm_35,sm_52,sm_61', '-c', '--compiler-options=-fPIC'] 15 | nvcc_compile_args = os.environ.get('NVCCFLAGS', '').split() + nvcc_compile_args 16 | cuda_libs = ['cublas'] 17 | nvcc_bin = 'nvcc.exe' 18 | lib_dir = 'lib/x64' 19 | 20 | 21 | import distutils.msvc9compiler 22 | distutils.msvc9compiler.VERSION = 14.0 23 | 24 | # Obtain the numpy include directory. This logic works across numpy versions. 25 | try: 26 | numpy_include = np.get_include() 27 | except AttributeError: 28 | numpy_include = np.get_numpy_include() 29 | 30 | 31 | cudamat_ext = Extension('gpu_nms', 32 | sources=[ 33 | 'gpu_nms.cu' 34 | ], 35 | language='c++', 36 | libraries=cuda_libs, 37 | extra_compile_args=nvcc_compile_args, 38 | include_dirs = [numpy_include, 'C:\\Programming\\CUDA\\v8.0\\include']) 39 | 40 | 41 | class CUDA_build_ext(build_ext): 42 | """ 43 | Custom build_ext command that compiles CUDA files. 44 | Note that all extension source files will be processed with this compiler. 45 | """ 46 | def build_extensions(self): 47 | self.compiler.src_extensions.append('.cu') 48 | self.compiler.set_executable('compiler_so', 'nvcc') 49 | self.compiler.set_executable('linker_so', 'nvcc --shared') 50 | if hasattr(self.compiler, '_c_extensions'): 51 | self.compiler._c_extensions.append('.cu') # needed for Windows 52 | self.compiler.spawn = self.spawn 53 | build_ext.build_extensions(self) 54 | 55 | def spawn(self, cmd, search_path=1, verbose=0, dry_run=0): 56 | """ 57 | Perform any CUDA specific customizations before actually launching 58 | compile/link etc. commands. 59 | """ 60 | if (sys.platform == 'darwin' and len(cmd) >= 2 and cmd[0] == 'nvcc' and 61 | cmd[1] == '--shared' and cmd.count('-arch') > 0): 62 | # Versions of distutils on OSX earlier than 2.7.9 inject 63 | # '-arch x86_64' which we need to strip while using nvcc for 64 | # linking 65 | while True: 66 | try: 67 | index = cmd.index('-arch') 68 | del cmd[index:index+2] 69 | except ValueError: 70 | break 71 | elif self.compiler.compiler_type == 'msvc': 72 | # There are several things we need to do to change the commands 73 | # issued by MSVCCompiler into one that works with nvcc. In the end, 74 | # it might have been easier to write our own CCompiler class for 75 | # nvcc, as we're only interested in creating a shared library to 76 | # load with ctypes, not in creating an importable Python extension. 77 | # - First, we replace the cl.exe or link.exe call with an nvcc 78 | # call. In case we're running Anaconda, we search cl.exe in the 79 | # original search path we captured further above -- Anaconda 80 | # inserts a MSVC version into PATH that is too old for nvcc. 81 | cmd[:1] = ['nvcc', '--compiler-bindir', 82 | os.path.dirname(find_executable("cl.exe", PATH)) 83 | or cmd[0]] 84 | # - Secondly, we fix a bunch of command line arguments. 85 | for idx, c in enumerate(cmd): 86 | # create .dll instead of .pyd files 87 | #if '.pyd' in c: cmd[idx] = c = c.replace('.pyd', '.dll') #20160601, by MrX 88 | # replace /c by -c 89 | if c == '/c': cmd[idx] = '-c' 90 | # replace /DLL by --shared 91 | elif c == '/DLL': cmd[idx] = '--shared' 92 | # remove --compiler-options=-fPIC 93 | elif '-fPIC' in c: del cmd[idx] 94 | # replace /Tc... by ... 95 | elif c.startswith('/Tc'): cmd[idx] = c[3:] 96 | # replace /Fo... by -o ... 97 | elif c.startswith('/Fo'): cmd[idx:idx+1] = ['-o', c[3:]] 98 | # replace /LIBPATH:... by -L... 99 | elif c.startswith('/LIBPATH:'): cmd[idx] = '-L' + c[9:] 100 | # replace /OUT:... by -o ... 101 | elif c.startswith('/OUT:'): cmd[idx:idx+1] = ['-o', c[5:]] 102 | # remove /EXPORT:initlibcudamat or /EXPORT:initlibcudalearn 103 | elif c.startswith('/EXPORT:'): del cmd[idx] 104 | # replace cublas.lib by -lcublas 105 | elif c == 'cublas.lib': cmd[idx] = '-lcublas' 106 | # - Finally, we pass on all arguments starting with a '/' to the 107 | # compiler or linker, and have nvcc handle all other arguments 108 | if '--shared' in cmd: 109 | pass_on = '--linker-options=' 110 | # we only need MSVCRT for a .dll, remove CMT if it sneaks in: 111 | cmd.append('/NODEFAULTLIB:libcmt.lib') 112 | else: 113 | pass_on = '--compiler-options=' 114 | cmd = ([c for c in cmd if c[0] != '/'] + 115 | [pass_on + ','.join(c for c in cmd if c[0] == '/')]) 116 | # For the future: Apart from the wrongly set PATH by Anaconda, it 117 | # would suffice to run the following for compilation on Windows: 118 | # nvcc -c -O -o .obj .cu 119 | # And the following for linking: 120 | # nvcc --shared -o .dll .obj .obj -lcublas 121 | # This could be done by a NVCCCompiler class for all platforms. 122 | spawn(cmd, search_path, verbose, dry_run) 123 | 124 | setup(name="py_fast_rcnn_gpu", 125 | description="Performs linear algebra computation on the GPU via CUDA", 126 | ext_modules=[cudamat_ext], 127 | cmdclass={'build_ext': CUDA_build_ext}, 128 | ) 129 | -------------------------------------------------------------------------------- /lib/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/rpn/__init__.py -------------------------------------------------------------------------------- /lib/rpn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/rpn/__init__.pyc -------------------------------------------------------------------------------- /lib/rpn/generate_anchor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate base anchors on index 0 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 9 | scales=2 ** np.arange(3, 6)): 10 | """ 11 | Generate anchor (reference) windows by enumerating aspect ratios X 12 | scales wrt a reference (0, 0, 15, 15) window. 13 | """ 14 | 15 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 16 | ratio_anchors = _ratio_enum(base_anchor, ratios) 17 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 18 | for i in xrange(ratio_anchors.shape[0])]) 19 | return anchors 20 | 21 | 22 | def _whctrs(anchor): 23 | """ 24 | Return width, height, x center, and y center for an anchor (window). 25 | """ 26 | 27 | w = anchor[2] - anchor[0] + 1 28 | h = anchor[3] - anchor[1] + 1 29 | x_ctr = anchor[0] + 0.5 * (w - 1) 30 | y_ctr = anchor[1] + 0.5 * (h - 1) 31 | return w, h, x_ctr, y_ctr 32 | 33 | 34 | def _mkanchors(ws, hs, x_ctr, y_ctr): 35 | """ 36 | Given a vector of widths (ws) and heights (hs) around a center 37 | (x_ctr, y_ctr), output a set of anchors (windows). 38 | """ 39 | 40 | ws = ws[:, np.newaxis] 41 | hs = hs[:, np.newaxis] 42 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 43 | y_ctr - 0.5 * (hs - 1), 44 | x_ctr + 0.5 * (ws - 1), 45 | y_ctr + 0.5 * (hs - 1))) 46 | return anchors 47 | 48 | 49 | def _ratio_enum(anchor, ratios): 50 | """ 51 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 52 | """ 53 | 54 | w, h, x_ctr, y_ctr = _whctrs(anchor) 55 | size = w * h 56 | size_ratios = size / ratios 57 | ws = np.round(np.sqrt(size_ratios)) 58 | hs = np.round(ws * ratios) 59 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 60 | return anchors 61 | 62 | 63 | def _scale_enum(anchor, scales): 64 | """ 65 | Enumerate a set of anchors for each scale wrt an anchor. 66 | """ 67 | 68 | w, h, x_ctr, y_ctr = _whctrs(anchor) 69 | ws = w * scales 70 | hs = h * scales 71 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 72 | return anchors 73 | -------------------------------------------------------------------------------- /lib/rpn/generate_anchor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/rpn/generate_anchor.pyc -------------------------------------------------------------------------------- /lib/rpn/rpn.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/rpn/rpn.pyc -------------------------------------------------------------------------------- /lib/segmentation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/segmentation/__init__.py -------------------------------------------------------------------------------- /lib/segmentation/segmentation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Segmentation: 3 | data = 4 | {'data': [num_images, c, h, w], 5 | 'im_info': [num_images, 4] (optional)} 6 | label = 7 | {'label': [batch_size, 1] <- [batch_size, c, h, w]} 8 | """ 9 | 10 | import numpy as np 11 | from utils.image import get_segmentation_image, tensor_vstack 12 | 13 | def get_segmentation_test_batch(segdb, config): 14 | """ 15 | return a dict of train batch 16 | :param segdb: ['image', 'flipped'] 17 | :param config: the config setting 18 | :return: data, label, im_info 19 | """ 20 | imgs, seg_cls_gts, segdb = get_segmentation_image(segdb, config) 21 | im_array = imgs 22 | im_info = [np.array([segdb[i]['im_info']], dtype=np.float32) for i in xrange(len(segdb))] 23 | 24 | data = [{'data': im_array[i], 25 | 'im_info': im_info[i]} for i in xrange(len(segdb))] 26 | label = [{'label':seg_cls_gts[i]} for i in xrange(len(segdb))] 27 | 28 | return data, label, im_info 29 | 30 | def get_segmentation_train_batch(segdb, config): 31 | """ 32 | return a dict of train batch 33 | :param segdb: ['image', 'flipped'] 34 | :param config: the config setting 35 | :return: data, label, im_info 36 | """ 37 | # assert len(segdb) == 1, 'Single batch only' 38 | assert len(segdb) == 1, 'Single batch only' 39 | 40 | imgs, seg_cls_gts, segdb = get_segmentation_image(segdb, config) 41 | im_array = imgs[0] 42 | seg_cls_gt = seg_cls_gts[0] 43 | 44 | im_info = np.array([segdb[0]['im_info']], dtype=np.float32) 45 | 46 | data = {'data': im_array, 47 | 'im_info': im_info} 48 | label = {'label': seg_cls_gt} 49 | 50 | return data, label 51 | 52 | -------------------------------------------------------------------------------- /lib/utils/JES3D_transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import inv 3 | from utils.JES3D_transform_utils import file_lines_to_list 4 | import copy 5 | def read_cam(str): 6 | cam = {} 7 | wrds=str.split(' ') 8 | if not len(wrds)==14: 9 | return cam 10 | vals = [float(w) for w in wrds] 11 | cam['id'] = vals[0]+1 12 | cam['f'] = vals[1] 13 | cam['R'] = np.reshape(vals[2:11],(3,3)) 14 | cam['ori'] = np.transpose(cam['R']) 15 | cam['T'] = np.transpose(np.reshape(vals[11:14],(1,3))) 16 | cam['pos'] = np.matmul(-cam['ori'],cam['T']) 17 | cam['z']=np.reshape(cam['ori'][:,2],(3,1)) 18 | return cam 19 | 20 | def load_calib_file(calib_file): 21 | views = ['Left', 'Right', 'Top'] 22 | file_lines = file_lines_to_list(calib_file) 23 | ncam = int(file_lines[0]) 24 | if not ncam==3: 25 | print('Error: Invalid calibration file - number of cameras must be 3') 26 | 27 | cams = [] 28 | pts = [] 29 | for i in range(ncam): 30 | cam = read_cam(file_lines[i+1]) 31 | if len(cam) == 0: 32 | print('Can not read camera from line {}'.format(i+1)) 33 | return cams, pts 34 | cam['view'] = views[i] 35 | cams.append(cam) 36 | npts = int(file_lines[4]) 37 | pts = np.zeros((npts,3)) 38 | for i in range(npts): 39 | wrds = file_lines[i+5].split(' ') 40 | p = [float(x) for x in wrds] 41 | if not len(p) == 3: 42 | print('Can not parse point at line {}'.format(i+5)) 43 | return cams, pts 44 | pts[i,:] = p 45 | return cams, pts 46 | 47 | class JES3D_transform: 48 | def __init__(self, calib_file): 49 | self.cams, self.pts = load_calib_file(calib_file) 50 | self. imgsK = [] 51 | self.W = 1280 52 | self.H=960 53 | self.estimated_hight = 0.05 54 | for i in range(3): 55 | self.imgsK.append(np.reshape([self.cams[i]['f'], 0 , (self.W-1)/2, 0, self.cams[i]['f'], (self.H-1)/2, 0, 0, 1],(3,3))) 56 | 57 | def rotate90(self,pt, direction=1): 58 | # rotating a point's cordinates by 90 degree (direction = +-1 ) 59 | rot_pt = pt.copy() 60 | if direction>0: 61 | rot_pt[0] = self.W-pt[1] 62 | rot_pt[1] = pt[0] 63 | else: 64 | rot_pt[0] = pt[1] 65 | rot_pt[1] = self.H - pt[0] 66 | return rot_pt 67 | 68 | def point_dist_from_cam_for_z(self,R, T, p_n, z): 69 | # given a camera with R, T(pc=R * pw + T) and normalized point in the camera p_n, find 70 | # distance from the camera such that pw_z = z. 71 | a = np.matmul(np.transpose(R),T) 72 | b = np.matmul(np.transpose(R),p_n) 73 | p_dist = (z + a[2]) / b[2] 74 | return p_dist 75 | 76 | #transfering point from src_view to top view (src_view: 0-left, 1-right) 77 | def trans_pt(self, p_src_i, src_view): 78 | #rotation 79 | if src_view==0: #left 80 | p_src_i = self.rotate90(p_src_i,1) 81 | if src_view == 1: # right 82 | p_src_i = self.rotate90(p_src_i,-1) 83 | 84 | p_src_i.append(1) 85 | p_src_n = np.matmul(inv(self.imgsK[src_view]),np.reshape(p_src_i,(3,1))) 86 | p_dist_c = self.point_dist_from_cam_for_z(self.cams[src_view]['R'], self.cams[src_view]['T'], p_src_n, self.estimated_hight) 87 | p_src_c = p_src_n * p_dist_c 88 | p_w = np.matmul(np.transpose(self.cams[src_view]['R']),(p_src_c-self.cams[src_view]['T'])) 89 | p_dst_c = np.matmul(self.cams[2]['R'], p_w) + self.cams[2]['T'] 90 | p_dst_n = p_dst_c / p_dst_c[2] 91 | p_dst = np.matmul(self.imgsK[2],p_dst_n) 92 | p_dst = np.transpose(p_dst[0: 2])+1 93 | 94 | return p_dst 95 | 96 | #transfering point from src_view to top view (src_view/trg_view: 0-left, 1-right,2-top) 97 | def trans_rot(self, p_src, src_view,trg_view): 98 | #rotation 99 | # if src_view==0: #left 100 | # p_src_i = self.rotate90(p_src_i,1) 101 | # if src_view == 1: # right 102 | # p_src_i = self.rotate90(p_src_i,-1) 103 | p_src_i = copy.deepcopy(p_src) 104 | p_src_i.append(1) 105 | p_src_n = np.matmul(inv(self.imgsK[src_view]),np.reshape(p_src_i,(3,1))) 106 | p_dist_c = self.point_dist_from_cam_for_z(self.cams[src_view]['R'], self.cams[src_view]['T'], p_src_n, self.estimated_hight) 107 | p_src_c = p_src_n * p_dist_c 108 | p_w = np.matmul(np.transpose(self.cams[src_view]['R']),(p_src_c-self.cams[src_view]['T'])) 109 | p_dst_c = np.matmul(self.cams[trg_view]['R'], p_w) + self.cams[trg_view]['T'] 110 | p_dst_n = p_dst_c / p_dst_c[2] 111 | p_dst = np.matmul(self.imgsK[trg_view],p_dst_n) 112 | p_dst = np.transpose(p_dst[0: 2])+1 113 | # rotate 180 since the original rotation was clockwise, JS rotation counterclockwise 114 | p_dst = p_dst[0] 115 | if trg_view==0: 116 | p_dst = [self.W -p_dst[0]-1, self.H - p_dst[1]-1] 117 | return p_dst 118 | 119 | 120 | 121 | 122 | # transforming prediction from src_view to top view (src_view: 0-left, 1-right) 123 | def trans_pred(self, pred, src_view): 124 | lt = [pred.left, pred.top] 125 | new_lt = self.trans_pt(lt,src_view) 126 | rb = [pred.right, pred.bottom] 127 | new_rb = self.trans_pt(rb, src_view) 128 | left = int(min(new_lt[0,0],new_rb[0,0])) 129 | right = int(max(new_lt[0,0], new_rb[0,0])) 130 | top = int(min(new_lt[0,1], new_rb[0,1])) 131 | bottom = int(max(new_lt[0,1], new_rb[0,1])) 132 | pred.left = max(left,0) 133 | pred.top = max(top,0) 134 | pred.right = min(right,self.W) 135 | pred.bottom = min(bottom,self.H) 136 | return pred 137 | 138 | # transforming prediction from rotated src_view to top view (src_view: 0-left, 1-right) 139 | def trans_pred_rot(self, pred, src_view): 140 | lt = [pred.left, pred.top] 141 | new_lt = self.trans_pt(lt,src_view) 142 | rb = [pred.right, pred.bottom] 143 | new_rb = self.trans_pt(rb, src_view) 144 | left = int(min(new_lt[0,0],new_rb[0,0])) 145 | right = int(max(new_lt[0,0], new_rb[0,0])) 146 | top = int(min(new_lt[0,1], new_rb[0,1])) 147 | bottom = int(max(new_lt[0,1], new_rb[0,1])) 148 | 149 | pred.left = max(left,0) 150 | pred.top = max(top,0) 151 | pred.right = min(right,self.W) 152 | pred.bottom = min(bottom,self.H) 153 | return pred 154 | 155 | -------------------------------------------------------------------------------- /lib/utils/JES3D_transform_utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | 3 | # open txt file lines to a list 4 | def file_lines_to_list(path): 5 | with open(path) as f: 6 | content = f.readlines() 7 | content = [x.strip() for x in content] # remove whitespace characters like `\n` at the end of each line 8 | return content 9 | 10 | #prediciton class 11 | class Pred: 12 | def __init__(self, id,conf,left,top,right,bottom): 13 | self.id = id 14 | self.conf = conf 15 | self.left = left 16 | self.top = top 17 | self.right = right 18 | self.bottom = bottom 19 | 20 | # calculate intersections with another prediction 21 | def calc_pred_intersection(self, pred): 22 | if not self.id == pred.id: 23 | return 0 24 | left = max(self.left, pred.left) 25 | right = min(self.right, pred.right) 26 | if left > right: 27 | return 0 28 | top = max(self.top, pred.top) 29 | bottom = min(self.bottom, pred.bottom) 30 | if top > bottom: 31 | return 0 32 | return (right - left) * (bottom - top) 33 | 34 | def plot_preds(img, preds, clr=(255, 0, 255)): 35 | if isinstance(img, str): 36 | img = cv2.imread(img) 37 | for pred in preds: 38 | cv2.rectangle(img, (pred.left, pred.top), (pred.right, pred.bottom), clr) 39 | cv2.putText(img, pred.id, (pred.left, pred.top), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA) 40 | return img 41 | 42 | #read predictions 43 | def read_predictions(txt_file): 44 | lines_list = file_lines_to_list(txt_file) 45 | prediction = [] 46 | for line in lines_list: 47 | id, conf, left, top, right, bottom = line.split(';') 48 | pred = Pred(id,float(conf),int(left),int(top),int(right),int(bottom)) 49 | prediction.append(pred) 50 | return prediction 51 | 52 | -------------------------------------------------------------------------------- /lib/utils/PrefetchingIter.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2016 by Contributors 4 | # Copyright (c) 2017 Microsoft 5 | # Copyright (c) 2019 IBM Corp 6 | # Licensed under The Apache-2.0 License [see LICENSE for details] 7 | # Modified by Yuwen Xiong 8 | # -------------------------------------------------------- 9 | 10 | 11 | import mxnet as mx 12 | from mxnet.io import DataDesc, DataBatch 13 | import threading 14 | 15 | 16 | class PrefetchingIter(mx.io.DataIter): 17 | """Base class for prefetching iterators. Takes one or more DataIters ( 18 | or any class with "reset" and "next" methods) and combine them with 19 | prefetching. For example: 20 | 21 | Parameters 22 | ---------- 23 | iters : DataIter or list of DataIter 24 | one or more DataIters (or any class with "reset" and "next" methods) 25 | rename_data : None or list of dict 26 | i-th element is a renaming map for i-th iter, in the form of 27 | {'original_name' : 'new_name'}. Should have one entry for each entry 28 | in iter[i].provide_data 29 | rename_label : None or list of dict 30 | Similar to rename_data 31 | 32 | Examples 33 | -------- 34 | iter = PrefetchingIter([NDArrayIter({'data': X1}), NDArrayIter({'data': X2})], 35 | rename_data=[{'data': 'data1'}, {'data': 'data2'}]) 36 | """ 37 | def __init__(self, iters, rename_data=None, rename_label=None): 38 | super(PrefetchingIter, self).__init__() 39 | if not isinstance(iters, list): 40 | iters = [iters] 41 | self.n_iter = len(iters) 42 | assert self.n_iter ==1, "Our prefetching iter only support 1 DataIter" 43 | self.iters = iters 44 | self.rename_data = rename_data 45 | self.rename_label = rename_label 46 | self.batch_size = len(self.provide_data) * self.provide_data[0][0][1][0] 47 | self.data_ready = [threading.Event() for i in range(self.n_iter)] 48 | self.data_taken = [threading.Event() for i in range(self.n_iter)] 49 | for e in self.data_taken: 50 | e.set() 51 | self.started = True 52 | self.current_batch = [None for _ in range(self.n_iter)] 53 | self.next_batch = [None for _ in range(self.n_iter)] 54 | def prefetch_func(self, i): 55 | """Thread entry""" 56 | while True: 57 | self.data_taken[i].wait() 58 | if not self.started: 59 | break 60 | try: 61 | self.next_batch[i] = self.iters[i].next() 62 | except StopIteration: 63 | self.next_batch[i] = None 64 | self.data_taken[i].clear() 65 | self.data_ready[i].set() 66 | self.prefetch_threads = [threading.Thread(target=prefetch_func, args=[self, i]) \ 67 | for i in range(self.n_iter)] 68 | for thread in self.prefetch_threads: 69 | thread.setDaemon(True) 70 | thread.start() 71 | 72 | def __del__(self): 73 | self.started = False 74 | for e in self.data_taken: 75 | e.set() 76 | for thread in self.prefetch_threads: 77 | thread.join() 78 | 79 | @property 80 | def provide_data(self): 81 | """The name and shape of data provided by this iterator""" 82 | if self.rename_data is None: 83 | return sum([i.provide_data for i in self.iters], []) 84 | else: 85 | return sum([[ 86 | DataDesc(r[x.name], x.shape, x.dtype) 87 | if isinstance(x, DataDesc) else DataDesc(*x) 88 | for x in i.provide_data 89 | ] for r, i in zip(self.rename_data, self.iters)], []) 90 | 91 | @property 92 | def provide_label(self): 93 | """The name and shape of label provided by this iterator""" 94 | if self.rename_label is None: 95 | return sum([i.provide_label for i in self.iters], []) 96 | else: 97 | return sum([[ 98 | DataDesc(r[x.name], x.shape, x.dtype) 99 | if isinstance(x, DataDesc) else DataDesc(*x) 100 | for x in i.provide_label 101 | ] for r, i in zip(self.rename_label, self.iters)], []) 102 | 103 | def reset(self): 104 | for e in self.data_ready: 105 | e.wait() 106 | for i in self.iters: 107 | i.reset() 108 | for e in self.data_ready: 109 | e.clear() 110 | for e in self.data_taken: 111 | e.set() 112 | 113 | def iter_next(self): 114 | for e in self.data_ready: 115 | e.wait() 116 | if self.next_batch[0] is None: 117 | return False 118 | else: 119 | self.current_batch = self.next_batch[0] 120 | for e in self.data_ready: 121 | e.clear() 122 | for e in self.data_taken: 123 | e.set() 124 | return True 125 | 126 | def next(self): 127 | if self.iter_next(): 128 | return self.current_batch 129 | else: 130 | raise StopIteration 131 | 132 | def getdata(self): 133 | return self.current_batch.data 134 | 135 | def getlabel(self): 136 | return self.current_batch.label 137 | 138 | def getindex(self): 139 | return self.current_batch.index 140 | 141 | def getpad(self): 142 | return self.current_batch.pad 143 | -------------------------------------------------------------------------------- /lib/utils/PrefetchingIter.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/PrefetchingIter.pyc -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | import PrefetchingIter -------------------------------------------------------------------------------- /lib/utils/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/__init__.pyc -------------------------------------------------------------------------------- /lib/utils/combine_model.py: -------------------------------------------------------------------------------- 1 | from load_model import load_checkpoint 2 | from save_model import save_checkpoint 3 | 4 | 5 | def combine_model(prefix1, epoch1, prefix2, epoch2, prefix_out, epoch_out): 6 | args1, auxs1 = load_checkpoint(prefix1, epoch1) 7 | args2, auxs2 = load_checkpoint(prefix2, epoch2) 8 | arg_names = args1.keys() + args2.keys() 9 | aux_names = auxs1.keys() + auxs2.keys() 10 | args = dict() 11 | for arg in arg_names: 12 | if arg in args1: 13 | args[arg] = args1[arg] 14 | if arg in args2: 15 | args[arg] = args2[arg] 16 | auxs = dict() 17 | for aux in aux_names: 18 | if aux in auxs1: 19 | auxs[aux] = auxs1[aux] 20 | if aux in auxs2: 21 | auxs[aux] = auxs2[aux] 22 | save_checkpoint(prefix_out, epoch_out, args, auxs) 23 | -------------------------------------------------------------------------------- /lib/utils/constant_symbol.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | @mx.init.register 3 | class MyConstant(mx.init.Initializer): 4 | def __init__(self, value): 5 | super(MyConstant, self).__init__(value=value) 6 | self.value = value 7 | 8 | def _init_weight(self, _, arr): 9 | arr[:] = mx.nd.array(self.value) -------------------------------------------------------------------------------- /lib/utils/constant_symbol.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/constant_symbol.pyc -------------------------------------------------------------------------------- /lib/utils/create_logger.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Bin Xiao 7 | # -------------------------------------------------------- 8 | 9 | import os 10 | import logging 11 | import time 12 | 13 | def create_logger(root_output_path, cfg, image_set): 14 | # set up logger 15 | if not os.path.exists(root_output_path): 16 | os.makedirs(root_output_path) 17 | assert os.path.exists(root_output_path), '{} does not exist'.format(root_output_path) 18 | 19 | cfg_name = os.path.basename(cfg).split('.')[0] 20 | config_output_path = os.path.join(root_output_path, '{}'.format(cfg_name)) 21 | if not os.path.exists(config_output_path): 22 | os.makedirs(config_output_path) 23 | 24 | image_sets = [iset for iset in image_set.split(';')] #JS Joseph Shtok: replaced '+' with ';' for delimenter 25 | final_output_path = os.path.join(config_output_path, '{}'.format('_'.join(image_sets))) 26 | if not os.path.exists(final_output_path): 27 | os.makedirs(final_output_path) 28 | 29 | log_file = '{}_{}.log'.format(cfg_name, time.strftime('%Y-%m-%d-%H-%M')) 30 | head = '%(asctime)-15s %(message)s' 31 | logging.basicConfig(filename=os.path.join(final_output_path, log_file), format=head) 32 | logger = logging.getLogger() 33 | logger.setLevel(logging.INFO) 34 | 35 | 36 | log_path = log_file[:-4] 37 | 38 | return logger, final_output_path, log_path 39 | 40 | -------------------------------------------------------------------------------- /lib/utils/create_logger.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/create_logger.pyc -------------------------------------------------------------------------------- /lib/utils/image.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/image.pyc -------------------------------------------------------------------------------- /lib/utils/image_processing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def resize(im, target_size, max_size): 6 | """ 7 | only resize input image to target size and return scale 8 | :param im: BGR image input by opencv 9 | :param target_size: one dimensional size (the short side) 10 | :param max_size: one dimensional max size (the long side) 11 | :return: 12 | """ 13 | im_shape = im.shape 14 | im_size_min = np.min(im_shape[0:2]) 15 | im_size_max = np.max(im_shape[0:2]) 16 | im_scale = float(target_size) / float(im_size_min) 17 | # prevent bigger axis from being more than max_size: 18 | if np.round(im_scale * im_size_max) > max_size: 19 | im_scale = float(max_size) / float(im_size_max) 20 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) 21 | return im, im_scale 22 | 23 | 24 | def transform(im, pixel_means, need_mean=False): 25 | """ 26 | transform into mxnet tensor 27 | subtract pixel size and transform to correct format 28 | :param im: [height, width, channel] in BGR 29 | :param pixel_means: [[[R, G, B pixel means]]] 30 | :return: [batch, channel, height, width] 31 | """ 32 | assert False, "shouldn't reach here." 33 | im = im.copy() 34 | im[:, :, (0, 1, 2)] = im[:, :, (2, 1, 0)] 35 | im = im.astype(float) 36 | if need_mean: 37 | im -= pixel_means 38 | im_tensor = im[np.newaxis, :] 39 | # put channel first 40 | channel_swap = (0, 3, 1, 2) 41 | im_tensor = im_tensor.transpose(channel_swap) 42 | return im_tensor 43 | 44 | 45 | def transform_inverse(im_tensor, pixel_means): 46 | """ 47 | transform from mxnet im_tensor to ordinary RGB image 48 | im_tensor is limited to one image 49 | :param im_tensor: [batch, channel, height, width] 50 | :param pixel_means: [[[R, G, B pixel means]]] 51 | :return: im [height, width, channel(RGB)] 52 | """ 53 | assert im_tensor.shape[0] == 1 54 | im_tensor = im_tensor.copy() 55 | # put channel back 56 | channel_swap = (0, 2, 3, 1) 57 | im_tensor = im_tensor.transpose(channel_swap) 58 | im = im_tensor[0] 59 | assert im.shape[2] == 3 60 | im += pixel_means 61 | im = im.astype(np.uint8) 62 | return im 63 | 64 | 65 | def tensor_vstack(tensor_list, pad=0): 66 | """ 67 | vertically stack tensors 68 | :param tensor_list: list of tensor to be stacked vertically 69 | :param pad: label to pad with 70 | :return: tensor with max shape 71 | """ 72 | ndim = len(tensor_list[0].shape) 73 | if ndim == 1: 74 | return np.hstack(tensor_list) 75 | dimensions = [0] 76 | for dim in range(1, ndim): 77 | dimensions.append(max([tensor.shape[dim] for tensor in tensor_list])) 78 | for ind, tensor in enumerate(tensor_list): 79 | pad_shape = [(0, 0)] 80 | for dim in range(1, ndim): 81 | pad_shape.append((0, dimensions[dim] - tensor.shape[dim])) 82 | tensor_list[ind] = np.lib.pad(tensor, pad_shape, 'constant', constant_values=pad) 83 | all_tensor = np.vstack(tensor_list) 84 | return all_tensor 85 | -------------------------------------------------------------------------------- /lib/utils/load_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from dataset import * 3 | 4 | 5 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 6 | flip=False, categ_index_offs=0, per_category_epoch_max=0, classes_list_fname='',\ 7 | return_num_classes=False, return_imdb=False,num_ex_per_class=0): 8 | """ load ground truth roidb """ 9 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path, categ_index_offs=categ_index_offs,\ 10 | per_category_epoch_max=per_category_epoch_max,classes_list_fname=classes_list_fname,num_ex_per_class=num_ex_per_class) 11 | roidb = imdb.gt_roidb() 12 | if flip: 13 | roidb = imdb.append_flipped_images(roidb) 14 | if return_num_classes: 15 | if return_imdb: 16 | return roidb, imdb.num_classes, imdb 17 | else: 18 | return roidb, imdb.num_classes 19 | else: 20 | return roidb 21 | 22 | 23 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 24 | proposal='rpn', append_gt=True, flip=False): 25 | """ load proposal roidb (append_gt when training) """ 26 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 27 | 28 | gt_roidb = imdb.gt_roidb() 29 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt) 30 | if flip: 31 | roidb = imdb.append_flipped_images(roidb) 32 | return roidb 33 | 34 | 35 | def merge_roidb(roidbs): 36 | """ roidb are list, concat them together """ 37 | roidb = roidbs[0] 38 | for r in roidbs[1:]: 39 | roidb.extend(r) 40 | return roidb 41 | 42 | 43 | def filter_roidb(roidb, config): 44 | """ remove roidb entries without usable rois """ 45 | 46 | def is_valid(entry): 47 | """ valid images have at least 1 fg or bg roi """ 48 | overlaps = entry['max_overlaps'] 49 | fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] 50 | bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] 51 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 52 | return valid 53 | 54 | num = len(roidb) 55 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 56 | num_after = len(filtered_roidb) 57 | print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after) 58 | 59 | return filtered_roidb 60 | 61 | 62 | def load_gt_segdb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 63 | flip=False): 64 | """ load ground truth segdb """ 65 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 66 | segdb = imdb.gt_segdb() 67 | if flip: 68 | segdb = imdb.append_flipped_images_for_segmentation(segdb) 69 | return segdb 70 | 71 | 72 | def merge_segdb(segdbs): 73 | """ segdb are list, concat them together """ 74 | segdb = segdbs[0] 75 | for r in segdbs[1:]: 76 | segdb.extend(r) 77 | return segdb 78 | -------------------------------------------------------------------------------- /lib/utils/load_data.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/load_data.pyc -------------------------------------------------------------------------------- /lib/utils/load_data_ext.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from dataset import * 3 | 4 | 5 | def load_gt_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 6 | flip=False, categ_index_offs=0, per_category_epoch_max=0, database_csv='',classes_list_fname='',\ 7 | return_num_classes=False, return_imdb=False,num_ex_per_class=0, force_reload = False): 8 | """ load ground truth roidb """ 9 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path, categ_index_offs=categ_index_offs,\ 10 | per_category_epoch_max=per_category_epoch_max,database_csv=database_csv,classes_list_fname=classes_list_fname,num_ex_per_class=num_ex_per_class) 11 | roidb = imdb.gt_roidb(force_reload) 12 | if flip: 13 | roidb = imdb.append_flipped_images(roidb) 14 | if return_num_classes: 15 | if return_imdb: 16 | return roidb, imdb.num_classes, imdb 17 | else: 18 | return roidb, imdb.num_classes 19 | else: 20 | return roidb 21 | 22 | 23 | def load_proposal_roidb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 24 | proposal='rpn', append_gt=True, flip=False): 25 | """ load proposal roidb (append_gt when training) """ 26 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 27 | 28 | gt_roidb = imdb.gt_roidb() 29 | roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb, append_gt) 30 | if flip: 31 | roidb = imdb.append_flipped_images(roidb) 32 | return roidb 33 | 34 | 35 | def merge_roidb(roidbs): 36 | """ roidb are list, concat them together """ 37 | roidb = roidbs[0] 38 | for r in roidbs[1:]: 39 | roidb.extend(r) 40 | return roidb 41 | 42 | 43 | def filter_roidb(roidb, config): 44 | """ remove roidb entries without usable rois """ 45 | 46 | def is_valid(entry): 47 | """ valid images have at least 1 fg or bg roi """ 48 | overlaps = entry['max_overlaps'] 49 | fg_inds = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] 50 | bg_inds = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] 51 | valid = len(fg_inds) > 0 or len(bg_inds) > 0 52 | return valid 53 | 54 | num = len(roidb) 55 | filtered_roidb = [entry for entry in roidb if is_valid(entry)] 56 | num_after = len(filtered_roidb) 57 | print 'filtered %d roidb entries: %d -> %d' % (num - num_after, num, num_after) 58 | 59 | return filtered_roidb 60 | 61 | 62 | def load_gt_segdb(dataset_name, image_set_name, root_path, dataset_path, result_path=None, 63 | flip=False): 64 | """ load ground truth segdb """ 65 | imdb = eval(dataset_name)(image_set_name, root_path, dataset_path, result_path) 66 | segdb = imdb.gt_segdb() 67 | if flip: 68 | segdb = imdb.append_flipped_images_for_segmentation(segdb) 69 | return segdb 70 | 71 | 72 | def merge_segdb(segdbs): 73 | """ segdb are list, concat them together """ 74 | segdb = segdbs[0] 75 | for r in segdbs[1:]: 76 | segdb.extend(r) 77 | return segdb 78 | -------------------------------------------------------------------------------- /lib/utils/load_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def load_checkpoint(prefix, epoch): 5 | """ 6 | Load model checkpoint from file. 7 | :param prefix: Prefix of model name. 8 | :param epoch: Epoch number of model we would like to load. 9 | :return: (arg_params, aux_params) 10 | arg_params : dict of str to NDArray 11 | Model parameter, dict of name to NDArray of net's weights. 12 | aux_params : dict of str to NDArray 13 | Model parameter, dict of name to NDArray of net's auxiliary states. 14 | """ 15 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 16 | arg_params = {} 17 | aux_params = {} 18 | for k, v in save_dict.items(): 19 | tp, name = k.split(':', 1) 20 | if tp == 'arg': 21 | arg_params[name] = v 22 | if tp == 'aux': 23 | aux_params[name] = v 24 | return arg_params, aux_params 25 | 26 | 27 | def convert_context(params, ctx): 28 | """ 29 | :param params: dict of str to NDArray 30 | :param ctx: the context to convert to 31 | :return: dict of str of NDArray with context ctx 32 | """ 33 | new_params = dict() 34 | for k, v in params.items(): 35 | new_params[k] = v.as_in_context(ctx) 36 | return new_params 37 | 38 | 39 | def load_param(prefix, epoch, convert=False, ctx=None, process=False): 40 | """ 41 | wrapper for load checkpoint 42 | :param prefix: Prefix of model name. 43 | :param epoch: Epoch number of model we would like to load. 44 | :param convert: reference model should be converted to GPU NDArray first 45 | :param ctx: if convert then ctx must be designated. 46 | :param process: model should drop any test 47 | :return: (arg_params, aux_params) 48 | """ 49 | arg_params, aux_params = load_checkpoint(prefix, epoch) 50 | if convert: 51 | if ctx is None: 52 | ctx = mx.cpu() 53 | arg_params = convert_context(arg_params, ctx) 54 | aux_params = convert_context(aux_params, ctx) 55 | if process: 56 | tests = [k for k in arg_params.keys() if '_test' in k] 57 | for test in tests: 58 | arg_params[test.replace('_test', '')] = arg_params.pop(test) 59 | return arg_params, aux_params 60 | -------------------------------------------------------------------------------- /lib/utils/load_model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/load_model.pyc -------------------------------------------------------------------------------- /lib/utils/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Yuwen Xiong 7 | # -------------------------------------------------------- 8 | 9 | 10 | import logging 11 | from mxnet.lr_scheduler import LRScheduler 12 | 13 | class WarmupMultiFactorScheduler(LRScheduler): 14 | """Reduce learning rate in factor at steps specified in a list 15 | 16 | Assume the weight has been updated by n times, then the learning rate will 17 | be 18 | 19 | base_lr * factor^(sum((step/n)<=1)) # step is an array 20 | 21 | Parameters 22 | ---------- 23 | step: list of int 24 | schedule learning rate after n updates 25 | factor: float 26 | the factor for reducing the learning rate 27 | """ 28 | def __init__(self, step, factor=1, warmup=False, warmup_lr=0, warmup_step=0): 29 | super(WarmupMultiFactorScheduler, self).__init__() 30 | assert isinstance(step, list) and len(step) >= 1 31 | for i, _step in enumerate(step): 32 | if i != 0 and step[i] <= step[i-1]: 33 | raise ValueError("Schedule step must be an increasing integer list") 34 | if _step < 1: 35 | raise ValueError("Schedule step must be greater or equal than 1 round") 36 | if factor > 1.0: 37 | raise ValueError("Factor must be no more than 1 to make lr reduce") 38 | self.step = step 39 | self.cur_step_ind = 0 40 | self.factor = factor 41 | self.count = 0 42 | self.warmup = warmup 43 | self.warmup_lr = warmup_lr 44 | self.warmup_step = warmup_step 45 | 46 | def __call__(self, num_update): 47 | """ 48 | Call to schedule current learning rate 49 | 50 | Parameters 51 | ---------- 52 | num_update: int 53 | the maximal number of updates applied to a weight. 54 | """ 55 | 56 | # NOTE: use while rather than if (for continuing training via load_epoch) 57 | if self.warmup and num_update < self.warmup_step: 58 | return self.warmup_lr 59 | while self.cur_step_ind <= len(self.step)-1: 60 | if num_update > self.step[self.cur_step_ind]: 61 | self.count = self.step[self.cur_step_ind] 62 | self.cur_step_ind += 1 63 | self.base_lr *= self.factor 64 | logging.info("Update[%d]: Change learning rate to %0.5e", 65 | num_update, self.base_lr) 66 | else: 67 | return self.base_lr 68 | return self.base_lr 69 | -------------------------------------------------------------------------------- /lib/utils/lr_scheduler.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/lr_scheduler.pyc -------------------------------------------------------------------------------- /lib/utils/mask_coco2voc.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Yi Li 7 | # -------------------------------------------------------- 8 | 9 | from skimage.draw import polygon 10 | import numpy as np 11 | 12 | def segToMask( S, h, w ): 13 | """ 14 | Convert polygon segmentation to binary mask. 15 | :param S (float array) : polygon segmentation mask 16 | :param h (int) : target mask height 17 | :param w (int) : target mask width 18 | :return: M (bool 2D array) : binary mask 19 | """ 20 | M = np.zeros((h,w), dtype=np.bool) 21 | for s in S: 22 | N = len(s) 23 | rr, cc = polygon(np.array(s[1:N:2]).clip(max=h-1), \ 24 | np.array(s[0:N:2]).clip(max=w-1)) # (y, x) 25 | M[rr, cc] = 1 26 | return M 27 | 28 | 29 | def decodeMask(R): 30 | """ 31 | Decode binary mask M encoded via run-length encoding. 32 | :param R (object RLE) : run-length encoding of binary mask 33 | :return: M (bool 2D array) : decoded binary mask 34 | """ 35 | N = len(R['counts']) 36 | M = np.zeros( (R['size'][0]*R['size'][1], )) 37 | n = 0 38 | val = 1 39 | for pos in range(N): 40 | val = not val 41 | for c in range(R['counts'][pos]): 42 | R['counts'][pos] 43 | M[n] = val 44 | n += 1 45 | return M.reshape((R['size']), order='F') 46 | 47 | def mask_coco2voc(coco_masks, im_height, im_width): 48 | voc_masks = np.zeros((len(coco_masks), im_height, im_width)) 49 | for i, ann in enumerate(coco_masks): 50 | if type(ann) == list: 51 | # polygon 52 | m = segToMask(ann, im_height, im_width) 53 | else: 54 | # rle 55 | m = decodeMask(ann) 56 | voc_masks[i,:,:]=m; 57 | return voc_masks 58 | -------------------------------------------------------------------------------- /lib/utils/mask_coco2voc.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/mask_coco2voc.pyc -------------------------------------------------------------------------------- /lib/utils/mask_voc2coco.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Yi Li 7 | # -------------------------------------------------------- 8 | 9 | from skimage.draw import polygon 10 | import numpy as np 11 | import cv2 12 | from utils.tictoc import tic, toc 13 | from dataset.pycocotools.mask import encode as encodeMask_c 14 | 15 | def encodeMask(M): 16 | """ 17 | Encode binary mask M using run-length encoding. 18 | :param M (bool 2D array) : binary mask to encode 19 | :return: R (object RLE) : run-length encoding of binary mask 20 | """ 21 | [h, w] = M.shape 22 | M = M.flatten(order='F') 23 | N = len(M) 24 | counts_list = [] 25 | pos = 0 26 | # counts 27 | counts_list.append(1) 28 | diffs = np.logical_xor(M[0:N - 1], M[1:N]) 29 | for diff in diffs: 30 | if diff: 31 | pos += 1 32 | counts_list.append(1) 33 | else: 34 | counts_list[pos] += 1 35 | # if array starts from 1. start with 0 counts for 0 36 | if M[0] == 1: 37 | counts_list = [0] + counts_list 38 | return {'size': [h, w], 39 | 'counts': counts_list, 40 | } 41 | 42 | def mask_voc2coco(voc_masks, voc_boxes, im_height, im_width, binary_thresh = 0.4): 43 | num_pred = len(voc_masks) 44 | assert(num_pred==voc_boxes.shape[0]) 45 | mask_img = np.zeros((im_height, im_width, num_pred), dtype=np.uint8, order='F') 46 | for i in xrange(num_pred): 47 | pred_box = np.round(voc_boxes[i, :4]).astype(int) 48 | pred_mask = voc_masks[i] 49 | pred_mask = cv2.resize(pred_mask.astype(np.float32), (pred_box[2] - pred_box[0] + 1, pred_box[3] - pred_box[1] + 1)) 50 | mask_img[pred_box[1]:pred_box[3]+1, pred_box[0]:pred_box[2]+1, i] = pred_mask >= binary_thresh 51 | coco_mask = encodeMask_c(mask_img) 52 | return coco_mask 53 | -------------------------------------------------------------------------------- /lib/utils/mask_voc2coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/mask_voc2coco.pyc -------------------------------------------------------------------------------- /lib/utils/miscellaneous.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/miscellaneous.pyc -------------------------------------------------------------------------------- /lib/utils/roidb.py: -------------------------------------------------------------------------------- 1 | """ 2 | roidb 3 | basic format [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] 4 | extended ['image', 'max_classes', 'max_overlaps', 'bbox_targets'] 5 | """ 6 | 7 | import cv2 8 | import numpy as np 9 | 10 | from bbox.bbox_regression import compute_bbox_regression_targets 11 | 12 | 13 | def prepare_roidb(imdb, roidb, cfg): 14 | """ 15 | add image path, max_classes, max_overlaps to roidb 16 | :param imdb: image database, provide path 17 | :param roidb: roidb 18 | :return: None 19 | """ 20 | print 'prepare roidb' 21 | for i in range(len(roidb)): # image_index 22 | roidb[i]['image'] = imdb.image_path_from_index(imdb.image_set_index[i]) 23 | if cfg.TRAIN.ASPECT_GROUPING: 24 | size = cv2.imread(roidb[i]['image']).shape 25 | roidb[i]['height'] = size[0] 26 | roidb[i]['width'] = size[1] 27 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 28 | max_overlaps = gt_overlaps.max(axis=1) 29 | max_classes = gt_overlaps.argmax(axis=1) 30 | roidb[i]['max_overlaps'] = max_overlaps 31 | roidb[i]['max_classes'] = max_classes 32 | 33 | # background roi => background class 34 | zero_indexes = np.where(max_overlaps == 0)[0] 35 | assert all(max_classes[zero_indexes] == 0) 36 | # foreground roi => foreground class 37 | nonzero_indexes = np.where(max_overlaps > 0)[0] 38 | assert all(max_classes[nonzero_indexes] != 0) 39 | -------------------------------------------------------------------------------- /lib/utils/save_model.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | 3 | 4 | def save_checkpoint(prefix, epoch, arg_params, aux_params): 5 | """Checkpoint the model data into file. 6 | :param prefix: Prefix of model name. 7 | :param epoch: The epoch number of the model. 8 | :param arg_params: dict of str to NDArray 9 | Model parameter, dict of name to NDArray of net's weights. 10 | :param aux_params: dict of str to NDArray 11 | Model parameter, dict of name to NDArray of net's auxiliary states. 12 | :return: None 13 | prefix-epoch.params will be saved for parameters. 14 | """ 15 | save_dict = {('arg:%s' % k) : v for k, v in arg_params.items()} 16 | save_dict.update({('aux:%s' % k) : v for k, v in aux_params.items()}) 17 | param_name = '%s-%04d.params' % (prefix, epoch) 18 | mx.nd.save(param_name, save_dict) 19 | -------------------------------------------------------------------------------- /lib/utils/show_boxes.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/show_boxes.pyc -------------------------------------------------------------------------------- /lib/utils/show_masks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import random 4 | import cv2 5 | 6 | def show_masks(im, dets, msks, show = True, thresh = 1e-3, scale = 1.0): 7 | plt.cla() 8 | plt.imshow(im) 9 | for det, msk in zip(dets, msks): 10 | color = (random.random(), random.random(), random.random()) # generate a random color 11 | bbox = det[:4] * scale 12 | cod = np.zeros(4).astype(int) 13 | cod[0] = int(bbox[0]) 14 | cod[1] = int(bbox[1]) 15 | cod[2] = int(bbox[2]) 16 | cod[3] = int(bbox[3]) 17 | if im[cod[0]:cod[2], cod[1]:cod[3], 0].size > 0: 18 | msk = cv2.resize(msk, im[cod[1]:cod[3], cod[0]:cod[2], 0].T.shape) 19 | bimsk = msk > thresh 20 | bimsk = bimsk.astype(int) 21 | bimsk = np.repeat(bimsk[:, :, np.newaxis], 3, axis=2) 22 | mskd = im[cod[1]:cod[3], cod[0]:cod[2], :] * bimsk 23 | clmsk = np.ones(bimsk.shape) * bimsk 24 | clmsk[:, :, 0] = clmsk[:, :, 0] * color[0] * 256; 25 | clmsk[:, :, 1] = clmsk[:, :, 1] * color[1] * 256; 26 | clmsk[:, :, 2] = clmsk[:, :, 2] * color[2] * 256; 27 | im[cod[1]:cod[3], cod[0]:cod[2], :] = im[cod[1]:cod[3], cod[0]:cod[2], :] + 0.8 * clmsk - 0.8 * mskd 28 | plt.imshow(im) 29 | if(show): 30 | plt.show() 31 | return im 32 | 33 | -------------------------------------------------------------------------------- /lib/utils/show_offset.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Guodong Zhang 7 | # -------------------------------------------------------- 8 | 9 | import matplotlib.pyplot as plt 10 | import numpy as np 11 | 12 | def show_boxes_simple(bbox, color='r', lw=2): 13 | rect = plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], 14 | bbox[3] - bbox[1], fill=False, edgecolor=color, linewidth=lw) 15 | plt.gca().add_patch(rect) 16 | 17 | def kernel_inv_map(vis_attr, target_point, map_h, map_w): 18 | pos_shift = [vis_attr['dilation'] * 0 - vis_attr['pad'], 19 | vis_attr['dilation'] * 1 - vis_attr['pad'], 20 | vis_attr['dilation'] * 2 - vis_attr['pad']] 21 | source_point = [] 22 | for idx in range(vis_attr['filter_size']**2): 23 | cur_source_point = np.array([target_point[0] + pos_shift[idx / 3], 24 | target_point[1] + pos_shift[idx % 3]]) 25 | if cur_source_point[0] < 0 or cur_source_point[1] < 0 \ 26 | or cur_source_point[0] > map_h - 1 or cur_source_point[1] > map_w - 1: 27 | continue 28 | source_point.append(cur_source_point.astype('f')) 29 | return source_point 30 | 31 | def offset_inv_map(source_points, offset): 32 | for idx, _ in enumerate(source_points): 33 | source_points[idx][0] += offset[2*idx] 34 | source_points[idx][1] += offset[2*idx + 1] 35 | return source_points 36 | 37 | def get_bottom_position(vis_attr, top_points, all_offset): 38 | map_h = all_offset[0].shape[2] 39 | map_w = all_offset[0].shape[3] 40 | 41 | for level in range(vis_attr['plot_level']): 42 | source_points = [] 43 | for idx, cur_top_point in enumerate(top_points): 44 | cur_top_point = np.round(cur_top_point) 45 | if cur_top_point[0] < 0 or cur_top_point[1] < 0 \ 46 | or cur_top_point[0] > map_h-1 or cur_top_point[1] > map_w-1: 47 | continue 48 | cur_source_point = kernel_inv_map(vis_attr, cur_top_point, map_h, map_w) 49 | cur_offset = np.squeeze(all_offset[level][:, :, int(cur_top_point[0]), int(cur_top_point[1])]) 50 | cur_source_point = offset_inv_map(cur_source_point, cur_offset) 51 | source_points = source_points + cur_source_point 52 | top_points = source_points 53 | return source_points 54 | 55 | def plot_according_to_point(vis_attr, im, source_points, map_h, map_w, color=[255,0,0]): 56 | plot_area = vis_attr['plot_area'] 57 | for idx, cur_source_point in enumerate(source_points): 58 | y = np.round((cur_source_point[0] + 0.5) * im.shape[0] / map_h).astype('i') 59 | x = np.round((cur_source_point[1] + 0.5) * im.shape[1] / map_w).astype('i') 60 | 61 | if x < 0 or y < 0 or x > im.shape[1]-1 or y > im.shape[0]-1: 62 | continue 63 | y = min(y, im.shape[0] - vis_attr['plot_area'] - 1) 64 | x = min(x, im.shape[1] - vis_attr['plot_area'] - 1) 65 | y = max(y, vis_attr['plot_area']) 66 | x = max(x, vis_attr['plot_area']) 67 | im[y-plot_area:y+plot_area+1, x-plot_area:x+plot_area+1, :] = np.tile( 68 | np.reshape(color, (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1) 69 | ) 70 | return im 71 | 72 | 73 | 74 | def show_dpsroi_offset(im, boxes, offset, classes, trans_std=0.1): 75 | plt.cla 76 | for idx, bbox in enumerate(boxes): 77 | plt.figure(idx+1) 78 | plt.axis("off") 79 | plt.imshow(im) 80 | 81 | offset_w = np.squeeze(offset[idx, classes[idx]*2, :, :]) * trans_std 82 | offset_h = np.squeeze(offset[idx, classes[idx]*2+1, :, :]) * trans_std 83 | x1 = int(bbox[0]) 84 | y1 = int(bbox[1]) 85 | x2 = int(bbox[2]) 86 | y2 = int(bbox[3]) 87 | roi_width = x2-x1+1 88 | roi_height = y2-y1+1 89 | part_size = offset_w.shape[0] 90 | bin_size_w = roi_width / part_size 91 | bin_size_h = roi_height / part_size 92 | show_boxes_simple(bbox, color='b') 93 | for ih in range(part_size): 94 | for iw in range(part_size): 95 | sub_box = np.array([x1+iw*bin_size_w, y1+ih*bin_size_h, 96 | x1+(iw+1)*bin_size_w, y1+(ih+1)*bin_size_h]) 97 | sub_offset = offset_h[ih, iw] * np.array([0, 1, 0, 1]) * roi_height \ 98 | + offset_w[ih, iw] * np.array([1, 0, 1, 0]) * roi_width 99 | sub_box = sub_box + sub_offset 100 | show_boxes_simple(sub_box) 101 | plt.show() 102 | 103 | def show_dconv_offset(im, all_offset, step=[2, 2], filter_size=3, 104 | dilation=2, pad=2, plot_area=2, plot_level=3): 105 | vis_attr = {'filter_size': filter_size, 'dilation': dilation, 'pad': pad, 106 | 'plot_area': plot_area, 'plot_level': plot_level} 107 | 108 | map_h = all_offset[0].shape[2] 109 | map_w = all_offset[0].shape[3] 110 | 111 | step_h = step[0] 112 | step_w = step[1] 113 | start_h = np.round(step_h / 2) 114 | start_w = np.round(step_w / 2) 115 | 116 | plt.figure() 117 | for im_h in range(start_h, map_h, step_h): 118 | for im_w in range(start_w, map_w, step_w): 119 | target_point = np.array([im_h, im_w]) 120 | source_y = np.round(target_point[0] * im.shape[0] / map_h) 121 | source_x = np.round(target_point[1] * im.shape[1] / map_w) 122 | if source_y < plot_area or source_x < plot_area \ 123 | or source_y >= im.shape[0] - plot_area or source_x >= im.shape[1] - plot_area: 124 | continue 125 | 126 | cur_im = np.copy(im) 127 | source_points = get_bottom_position(vis_attr, [target_point], all_offset) 128 | cur_im = plot_according_to_point(vis_attr, cur_im, source_points, map_h, map_w) 129 | cur_im[source_y-plot_area:source_y+plot_area+1, source_x-plot_area:source_x+plot_area+1, :] = \ 130 | np.tile(np.reshape([0, 255, 0], (1, 1, 3)), (2*plot_area+1, 2*plot_area+1, 1)) 131 | 132 | 133 | plt.axis("off") 134 | plt.imshow(cur_im) 135 | plt.show(block=False) 136 | plt.pause(0.01) 137 | plt.clf() 138 | -------------------------------------------------------------------------------- /lib/utils/symbol.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Deformable Convolutional Networks 3 | # Copyright (c) 2017 Microsoft 4 | # Copyright (c) 2019 IBM Corp 5 | # Licensed under The Apache-2.0 License [see LICENSE for details] 6 | # Written by Yuwen Xiong 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | class Symbol: 11 | def __init__(self): 12 | self.arg_shape_dict = None 13 | self.out_shape_dict = None 14 | self.aux_shape_dict = None 15 | self.sym = None 16 | 17 | @property 18 | def symbol(self): 19 | return self.sym 20 | 21 | def get_symbol(self, cfg, is_train=True): 22 | """ 23 | return a generated symbol, it also need to be assigned to self.sym 24 | """ 25 | raise NotImplementedError() 26 | 27 | def init_weights(self, cfg, arg_params, aux_params): 28 | raise NotImplementedError() 29 | 30 | def get_msra_std(self, shape): 31 | fan_in = float(shape[1]) 32 | if len(shape) > 2: 33 | fan_in *= np.prod(shape[2:]) 34 | print(np.sqrt(2 / fan_in)) 35 | return np.sqrt(2 / fan_in) 36 | 37 | def infer_shape(self, data_shape_dict): 38 | # infer shape 39 | arg_shape, out_shape, aux_shape = self.sym.infer_shape(**data_shape_dict) 40 | self.arg_shape_dict = dict(zip(self.sym.list_arguments(), arg_shape)) 41 | self.out_shape_dict = dict(zip(self.sym.list_outputs(), out_shape)) 42 | self.aux_shape_dict = dict(zip(self.sym.list_auxiliary_states(), aux_shape)) 43 | 44 | def check_parameter_shapes(self, arg_params, aux_params, data_shape_dict, is_train=True): 45 | for k in self.sym.list_arguments(): 46 | if k in data_shape_dict or (False if is_train else 'label' in k) or ('const_eq_' in k) or ('cls_reps' in k): 47 | continue 48 | assert k in arg_params, k + ' not initialized' 49 | assert arg_params[k].shape == self.arg_shape_dict[k], \ 50 | 'shape inconsistent for ' + k + ' inferred ' + str(self.arg_shape_dict[k]) + ' provided ' + str( 51 | arg_params[k].shape) 52 | for k in self.sym.list_auxiliary_states(): 53 | assert k in aux_params, k + ' not initialized' 54 | assert aux_params[k].shape == self.aux_shape_dict[k], \ 55 | 'shape inconsistent for ' + k + ' inferred ' + str(self.aux_shape_dict[k]) + ' provided ' + str( 56 | aux_params[k].shape) 57 | -------------------------------------------------------------------------------- /lib/utils/symbol.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/symbol.pyc -------------------------------------------------------------------------------- /lib/utils/tfs_vis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cPickle 4 | from sklearn.manifold import TSNE 5 | 6 | def get_reps_proj(reps,samples,pars): 7 | indices_reps = [] 8 | indices_samples = [] 9 | X = np.zeros((pars.Edim, 0)) 10 | for idx in range(pars.Nclasses): 11 | reps_c = reps[:, :, idx] 12 | samples_c = samples[idx] 13 | endp = X.shape[1] 14 | X = np.concatenate((X, reps_c, samples_c), axis=1) 15 | indices_reps += [range(endp, endp + reps_c.shape[1])] 16 | indices_samples += [range(endp + reps_c.shape[1], endp + reps_c.shape[1] + samples_c.shape[1])] 17 | X_embedded = TSNE(n_components=2, random_state=0, n_iter=500).fit_transform(X.transpose()) 18 | 19 | return X_embedded, indices_reps, indices_samples 20 | 21 | def get_bg_reps_proj(bg_reps,bg_samples,pars): 22 | Nbg_reps = bg_reps.shape[1] 23 | X = np.concatenate((bg_reps, bg_samples), axis=1) 24 | indices_reps = range(Nbg_reps) 25 | indices_samples = range(Nbg_reps, X.shape[1]) 26 | X_embedded = TSNE(n_components=2, random_state=0, n_iter=500).fit_transform(X.transpose()) 27 | return X_embedded, indices_reps, indices_samples 28 | 29 | def normalize_reps(reps): 30 | reps_norm = np.sqrt(np.sum(np.square(reps), axis=0)) 31 | if reps.ndim == 3: 32 | for i in range(reps.shape[1]): 33 | for j in range(reps.shape[2]): 34 | reps[:, i, j] = reps[:, i, j] / reps_norm[i, j] 35 | if reps.ndim == 2: 36 | for i in range(reps.shape[1]): 37 | reps[:, i] = reps[:, i] / reps_norm[i] 38 | return reps 39 | 40 | def vis_reps_TSNE(samples,reps,bg_samples,bg_reps,pars): 41 | # inputs: 42 | # par: Edim, REP_L2_NORM, Nreps, Nbg_reps, Nclasses, dpi_value = 1200, pars.GroupSize = 8 43 | # samples, reps1, reps2, bg_reps1, bg_reps2. # reps=[Edim, Nreps, Nclasses] bg_reps = [Edim, Nbg_reps], samples = [ [Edim, N] ...] in a list of length Nclasses corresp. to order of reps 44 | 45 | import matplotlib.pyplot as plt 46 | import matplotlib.colors as colors 47 | import matplotlib.cm as cmx 48 | 49 | if pars.REP_L2_NORM: 50 | reps = normalize_reps(reps) 51 | if pars.do_BG: 52 | bg_reps = normalize_reps(bg_reps) 53 | 54 | # print the reps ======================================================================================= 55 | 56 | X_embedded, indices_reps, indices_samples = get_reps_proj(reps,samples,pars) 57 | 58 | # with open(pars.X_embedded_fname, 'wb') as fid: 59 | # cPickle.dump({'X_embedded': X_embedded, 'indices_reps': indices_reps}, fid, protocol=cPickle.HIGHEST_PROTOCOL) 60 | # with open('/dccstor/jsdata1/dev/RepMet/vis_data.pkl','wb') as fid: 61 | # cPickle.dump({'reps':reps,'reps_mat':reps_mat,'bg_reps_mat':bg_reps_mat,'bg_reps':bg_reps,'samples':samples,'bg_embeds':bg_embeds},fid,protocol=cPickle.HIGHEST_PROTOCOL) 62 | 63 | Ngroups = int(np.ceil(pars.Nclasses / pars.GroupSize)) 64 | jet = cm = plt.get_cmap('jet') 65 | cNorm = colors.Normalize(vmin=0, vmax=pars.GroupSize) 66 | scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) 67 | for gn in range(Ngroups): 68 | save_fname = os.path.join(pars.vis_reps_fname_pref + '{0}.jpg'.format(gn)) 69 | fig, ax = plt.subplots(1, 1) 70 | # for idx in range(pars.Nclasses): 71 | for idx in range(gn * pars.GroupSize, (gn + 1) * pars.GroupSize): 72 | if idx >= pars.Nclasses: 73 | break 74 | colorVal = scalarMap.to_rgba(idx - gn * pars.GroupSize) 75 | ax.scatter(X_embedded[indices_reps[idx], 0], X_embedded[indices_reps[idx], 1], marker='x', s=7, color=colorVal) 76 | ax.scatter(X_embedded[indices_samples[idx], 0], X_embedded[indices_samples[idx], 1], color=colorVal, s=0.15) 77 | ax.axis('off') 78 | fig.savefig(save_fname, dpi=pars.dpi_value) 79 | plt.close(fig) 80 | 81 | cNorm = colors.Normalize(vmin=0, vmax=pars.Nclasses) 82 | scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet) 83 | save_fname = pars.vis_reps_fname_pref+'_all.jpg' 84 | fig, ax = plt.subplots(1, 1) 85 | for idx in range(pars.Nclasses): 86 | colorVal = scalarMap.to_rgba(idx) 87 | ax.scatter(X_embedded[indices_reps[idx], 0], X_embedded[indices_reps[idx], 1], marker='x', s=7, color=colorVal) 88 | ax.scatter(X_embedded[indices_samples[idx], 0], X_embedded[indices_samples[idx], 1], color=colorVal, s=0.15) 89 | ax.axis('off') 90 | 91 | fig.savefig(save_fname, dpi=pars.dpi_value) 92 | plt.close(fig) 93 | # print the bg_reps ======================================================================================= 94 | if pars.do_BG: 95 | X_embedded, indices_reps, indices_samples = get_bg_reps_proj(bg_reps,bg_samples,pars) 96 | save_fname = pars.vis_bg_reps_fname 97 | fig, ax = plt.subplots(1, 1) 98 | ax.scatter(X_embedded[indices_samples, 0], X_embedded[indices_samples, 1], color=[0, 1, 0], s=0.15) 99 | ax.scatter(X_embedded[indices_reps, 0], X_embedded[indices_reps, 1], marker='x', s=7, color=[1, 0, 0]) 100 | ax.axis('off') 101 | fig.savefig(save_fname, dpi=pars.dpi_value) 102 | plt.close(fig) 103 | -------------------------------------------------------------------------------- /lib/utils/tfs_vis.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/tfs_vis.pyc -------------------------------------------------------------------------------- /lib/utils/tictoc.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def tic(): 4 | import time 5 | global startTime_for_tictoc 6 | startTime_for_tictoc = time.time() 7 | return startTime_for_tictoc 8 | 9 | def toc(): 10 | if 'startTime_for_tictoc' in globals(): 11 | endTime = time.time() 12 | return endTime - startTime_for_tictoc 13 | else: 14 | return None -------------------------------------------------------------------------------- /lib/utils/tictoc.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/lib/utils/tictoc.pyc -------------------------------------------------------------------------------- /output/benchmarks/RepMet_inloc_10shot_5way_10qpc_500epi_episodes.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/output/benchmarks/RepMet_inloc_10shot_5way_10qpc_500epi_episodes.npz -------------------------------------------------------------------------------- /output/benchmarks/RepMet_inloc_5shot_5way_10qpc_500epi_episodes.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yang-yk/NP-RepMet/d0e09ab6ff5eef8d79e70a9ecc8804dfcc411fc1/output/benchmarks/RepMet_inloc_5shot_5way_10qpc_500epi_episodes.npz --------------------------------------------------------------------------------