├── .gitignore ├── LICENSE ├── README.md ├── _init_paths.py ├── cfgs ├── res101_ms.yml └── res50.yml ├── data ├── README.md └── VOCsplits │ ├── VOC2007 │ ├── train_first_split.txt │ ├── train_second_split.txt │ ├── train_third_split.txt │ ├── voc_2007_train_first_split.txt │ ├── voc_2007_train_second_split.txt │ └── voc_2007_train_third_split.txt │ └── VOC2012 │ ├── train_first_split.txt │ ├── train_second_split.txt │ ├── train_third_split.txt │ ├── voc_2012_train_first_split.txt │ ├── voc_2012_train_second_split.txt │ └── voc_2012_train_third_split.txt ├── download_models.sh ├── img └── PipelineDet.png ├── lib ├── datasets │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── coco.py │ ├── custom.py │ ├── custom_eval.py │ ├── custom_metadata.py │ ├── ds_utils.py │ ├── factory.py │ ├── imdb.py │ ├── metadata.py │ ├── metadata_3d.py │ ├── metadata_TFA.py │ ├── metadata_coco.py │ ├── objectnet3d.py │ ├── pascal3d.py │ ├── pascal3d_eval.py │ ├── pascal_voc.py │ ├── pascal_voc_rbg.py │ ├── tools │ │ ├── compute_prior.py │ │ └── mcg_munge.py │ └── voc_eval.py ├── make.sh ├── model │ ├── __init__.py │ ├── faster_rcnn │ │ ├── __init__.py │ │ ├── faster_rcnn.py │ │ ├── resnet.py │ │ ├── trail.py │ │ └── vgg16.py │ ├── nms │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── nms │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── nms_cpu.py │ │ ├── nms_gpu.py │ │ ├── nms_kernel.cu │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cuda.c │ │ │ ├── nms_cuda.h │ │ │ ├── nms_cuda_kernel.cu │ │ │ ├── nms_cuda_kernel.cu.o │ │ │ └── nms_cuda_kernel.h │ ├── roi_align │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_align │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.c │ │ │ ├── roi_align_cuda.h │ │ │ ├── roi_align_kernel.cu │ │ │ ├── roi_align_kernel.cu.o │ │ │ └── roi_align_kernel.h │ ├── roi_crop │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_crop │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ ├── crop_resize.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ ├── gridgen.py │ │ │ └── roi_crop.py │ │ └── src │ │ │ ├── roi_crop.c │ │ │ ├── roi_crop.h │ │ │ ├── roi_crop_cuda.c │ │ │ ├── roi_crop_cuda.h │ │ │ ├── roi_crop_cuda_kernel.cu │ │ │ ├── roi_crop_cuda_kernel.cu.o │ │ │ └── roi_crop_cuda_kernel.h │ ├── roi_pooling │ │ ├── __init__.py │ │ ├── _ext │ │ │ ├── __init__.py │ │ │ └── roi_pooling │ │ │ │ └── __init__.py │ │ ├── build.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ └── src │ │ │ ├── roi_pooling.c │ │ │ ├── roi_pooling.cu.o │ │ │ ├── roi_pooling.h │ │ │ ├── roi_pooling_cuda.c │ │ │ ├── roi_pooling_cuda.h │ │ │ ├── roi_pooling_kernel.cu │ │ │ └── roi_pooling_kernel.h │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_target_layer.py │ │ ├── bbox_transform.py │ │ ├── generate_anchors.py │ │ ├── proposal_layer.py │ │ ├── proposal_layer_region.py │ │ ├── proposal_target_layer_cascade.py │ │ ├── proposal_target_layer_cascade_region.py │ │ ├── rpn.py │ │ └── rpn_region.py │ └── utils │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── bbox.pyx │ │ ├── blob.py │ │ ├── config.py │ │ └── net_utils.py ├── pycocotools │ ├── UPSTREAM_REV │ ├── __init__.py │ ├── _mask.c │ ├── _mask.pyx │ ├── coco.py │ ├── cocoeval.py │ ├── license.txt │ ├── mask.py │ ├── maskApi.c │ └── maskApi.h ├── roi_data_layer │ ├── __init__.py │ ├── minibatch.py │ ├── roibatchLoader.py │ └── roidb.py └── setup.py ├── run ├── finetune_coco.sh ├── finetune_coco_TFA.sh ├── finetune_voc_first.sh ├── finetune_voc_second.sh ├── finetune_voc_third.sh ├── test_coco.sh ├── test_voc_first.sh ├── test_voc_second.sh ├── test_voc_third.sh ├── train_coco.sh ├── train_voc_first.sh ├── train_voc_second.sh └── train_voc_third.sh ├── spec-file.txt ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | pip-wheel-metadata/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | db.sqlite3 61 | db.sqlite3-journal 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # IPython 80 | profile_default/ 81 | ipython_config.py 82 | 83 | # pyenv 84 | .python-version 85 | 86 | # pipenv 87 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 88 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 89 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 90 | # install all needed dependencies. 91 | #Pipfile.lock 92 | 93 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 94 | __pypackages__/ 95 | 96 | # Celery stuff 97 | celerybeat-schedule 98 | celerybeat.pid 99 | 100 | # SageMath parsed files 101 | *.sage.py 102 | 103 | # Environments 104 | .env 105 | .venv 106 | env/ 107 | venv/ 108 | ENV/ 109 | env.bak/ 110 | venv.bak/ 111 | 112 | # Spyder project settings 113 | .spyderproject 114 | .spyproject 115 | 116 | # Rope project settings 117 | .ropeproject 118 | 119 | # mkdocs documentation 120 | /site 121 | 122 | # mypy 123 | .mypy_cache/ 124 | .dmypy.json 125 | dmypy.json 126 | 127 | # Pyre type checker 128 | .pyre/ 129 | 130 | .idea/ 131 | results/ 132 | checkpoint/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | import os 4 | 5 | if os.listdir('data/cache/'): 6 | os.system('rm data/cache/*') 7 | 8 | def add_path(path): 9 | if path not in sys.path: 10 | sys.path.insert(0, path) 11 | 12 | this_dir = osp.dirname(__file__) 13 | 14 | # Add lib to PYTHONPATH 15 | lib_path = osp.join(this_dir, 'lib') 16 | add_path(lib_path) 17 | 18 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI') 19 | add_path(coco_path) 20 | 21 | vg_path = osp.join(this_dir, 'data', 'vgapi') 22 | add_path(vg_path) 23 | -------------------------------------------------------------------------------- /cfgs/res101_ms.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res101 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | MAX_SIZE: 1000 13 | SCALES: [600] 14 | DOUBLE_BIAS: False 15 | RCNN_BBOX_WEIGHT: 1 16 | TEST: 17 | SCALES: [600] 18 | HAS_RPN: True 19 | POOLING_SIZE: 7 20 | POOLING_MODE: align 21 | CROP_RESIZE_WITH_MAX_POOL: False 22 | -------------------------------------------------------------------------------- /cfgs/res50.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: res50 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_BATCHSIZE: 256 7 | PROPOSAL_METHOD: gt 8 | BG_THRESH_LO: 0.0 9 | DISPLAY: 20 10 | BATCH_SIZE: 128 11 | WEIGHT_DECAY: 0.0001 12 | MAX_SIZE: 1000 13 | SCALES: [600] 14 | DOUBLE_BIAS: False 15 | RCNN_BBOX_WEIGHT: 1 16 | TEST: 17 | SCALES: [600] 18 | HAS_RPN: True 19 | POOLING_SIZE: 7 20 | POOLING_MODE: align 21 | CROP_RESIZE_WITH_MAX_POOL: False 22 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | # Few-Shot Object Detection (Data Preparation) 2 | 3 | First go to the data dir ``cd ./data`` 4 | 5 | ## PASCAL VOC 6 | 7 | Download data from official website: 8 | ```bash 9 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar 10 | tar xvf VOCtrainval_06-Nov-2007.tar && rm VOCtrainval_06-Nov-2007.tar 11 | 12 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar 13 | tar xvf VOCtest_06-Nov-2007.tar && rm VOCtest_06-Nov-2007.tar 14 | 15 | wget host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar 16 | tar xvf VOCtrainval_11-May-2012.tar && rm VOCtrainval_11-May-2012.tar 17 | ``` 18 | 19 | Data structure should look like: 20 | ``` 21 | data/VOCdevkit 22 | VOC{2007,2012}/ 23 | Annotations/ 24 | ImageSets/ 25 | JPEGImages/ 26 | ... 27 | ``` 28 | 29 | Move the dataset split files to the correct path: 30 | ```bash 31 | mv VOCsplits/VOC2007/* VOCdevkit/VOC2007/ImageSets/Main/ 32 | mv VOCsplits/VOC2012/* VOCdevkit/VOC2012/ImageSets/Main/ 33 | ``` 34 | 35 | 36 | ## COCO 37 | 38 | Download data from official website: 39 | ```bash 40 | mkdir coco 41 | 42 | # download images 43 | cd coco 44 | mkdir images 45 | 46 | wget images.cocodataset.org/zips/train2014.zip 47 | unzip train2014.zip && rm train2014.zip 48 | mv train2014 images/ 49 | 50 | wget images.cocodataset.org/zips/val2014.zip 51 | unzip val2014.zip && rm val2014.zip 52 | mv val2014 images/ 53 | 54 | # download annaotations 55 | cd .. 56 | wget images.cocodataset.org/annotations/annotations_trainval2014.zip 57 | unzip annotations_trainval2014.zip && rm annotations_trainval2014.zip 58 | ``` 59 | 60 | Data structure should look like: 61 | ``` 62 | data/coco 63 | annotations/ 64 | images/ 65 | train2014/ 66 | val2014/ 67 | ``` 68 | 69 | 70 | 71 | ## Custom Dataset 72 | 73 | To experiment with your own dataset, first save the dataset or create a soft link in the folder ```ProjectRootDir/data``` 74 | Moreover, you should have a **csv-format** file containing all the annotations such as: 75 | ``` 76 | set,im_path,cls,difficult,left,upper,right,lower,height,width 77 | train,ImagePathInDataset,cat,False,x1,y1,x2,y2,H,W 78 | train,ImagePathInDataset,dog,False,x1,y1,x2,y2,H,W 79 | val,ImagePathInDataset,cat,False,x1,y1,x2,y2,H,W 80 | val,ImagePathInDataset,dog,False,x1,y1,x2,y2,H,W 81 | ... 82 | ``` 83 | 84 | Then, look into [factory.py](https://github.com/YoungXIAO13/FewShotDetection/blob/4e8d0c4a0352133113b8438a6a5fa8195661c6c0/lib/datasets/factory.py#L21) to set up correctly the dataset root path and split names (train, val, etc).\ 85 | Also, set up the novel classes for your custom dataset in the [config.py](https://github.com/YoungXIAO13/FewShotDetection/blob/1a77fbd81fb2f319e517c70faf5b9d7eca6b0546/lib/model/utils/config.py#L27) 86 | 87 | Once this is done, look further into \ 88 | [custom.py](https://github.com/YoungXIAO13/FewShotDetection/blob/master/lib/datasets/custom.py) for dataset creation;\ 89 | [custom_metadata.py](https://github.com/YoungXIAO13/FewShotDetection/blob/master/lib/datasets/custom_metadata.py) for few-shot class data creation;\ 90 | [custom_eval.py](https://github.com/YoungXIAO13/FewShotDetection/blob/master/lib/datasets/custom_eval.py) for evaluation. 91 | 92 | Finally, you should set the correct values for custom dataset in [train.py](https://github.com/YoungXIAO13/FewShotDetection/blob/1a77fbd81fb2f319e517c70faf5b9d7eca6b0546/train.py#L198) and [test.py](https://github.com/YoungXIAO13/FewShotDetection/blob/1a77fbd81fb2f319e517c70faf5b9d7eca6b0546/test.py#L129) 93 | 94 | 95 | -------------------------------------------------------------------------------- /download_models.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget https://www.dropbox.com/s/gemt3cxgr9b1aht/FewShotDetectionBaseModels.zip?dl=0 && mv FewShotDetectionBaseModels.zip?dl=0 FewShotDetectionBaseModels.zip 4 | 5 | unzip FewShotDetectionBaseModels.zip && rm FewShotDetectionBaseModels.zip 6 | -------------------------------------------------------------------------------- /img/PipelineDet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/img/PipelineDet.png -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= ) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- -------------------------------------------------------------------------------- /lib/datasets/custom.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import numpy as np 3 | import scipy.sparse 4 | import scipy.io as sio 5 | import pickle 6 | import pandas as pd 7 | 8 | import datasets 9 | import datasets.custom 10 | from .custom_eval import custom_eval 11 | from datasets.imdb import imdb 12 | from model.utils.config import cfg 13 | 14 | 15 | class custom(imdb): 16 | def __init__(self, image_set, data_path, csv_file='custom_dataset.txt'): 17 | imdb.__init__(self, 'custom_{}'.format(image_set)) 18 | self._image_set = image_set 19 | self._data_path = data_path 20 | assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path) 21 | 22 | df = pd.read_csv(os.path.join(data_path, csv_file)) 23 | 24 | self.df = df[df.set == self._image_set] 25 | 26 | # Add novel classes after base classes [base / novel] 27 | self._classes = tuple(['__background__'] + 28 | [c for c in np.unique(df.cls).tolist() if c not in cfg.CUSTOM_NOVEL_CLASSES] + 29 | [c for c in np.unique(df.cls).tolist() if c in cfg.CUSTOM_NOVEL_CLASSES]) 30 | self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) 31 | 32 | # The image index is set to be the unique image path in the dataset 33 | self._image_index = np.unique(self.df.im_path).tolist() 34 | 35 | # Default to roidb handler 36 | self._roidb_handler = self.gt_roidb 37 | self._comp_id = 'comp4' 38 | 39 | # Specific config options 40 | self.config = {'cleanup': False} 41 | 42 | def image_id_at(self, i): 43 | """ 44 | Return the absolute path to image i in the image sequence. 45 | """ 46 | return i 47 | 48 | def image_path_at(self, i): 49 | """ 50 | Return the absolute path to image i in the image sequence. 51 | """ 52 | return self.image_path_from_index(self._image_index[i]) 53 | 54 | def image_path_from_index(self, index): 55 | """ 56 | Construct an image path from the image's "index" identifier. 57 | """ 58 | image_path = os.path.join(self._data_path, index) 59 | assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path) 60 | return image_path 61 | 62 | def gt_roidb(self): 63 | """ 64 | Return the database of ground-truth regions of interest. 65 | This function loads/saves from/to a cache file to speed up future calls. 66 | """ 67 | cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') 68 | if os.path.exists(cache_file): 69 | with open(cache_file, 'rb') as fid: 70 | roidb = pickle.load(fid) 71 | print('{} gt roidb loaded from {}'.format(self.name, cache_file)) 72 | return roidb 73 | 74 | gt_roidb = [self._load_annotation(index) for index in self.image_index] 75 | with open(cache_file, 'wb') as fid: 76 | pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) 77 | print('wrote gt roidb to {}'.format(cache_file)) 78 | 79 | return gt_roidb 80 | 81 | def _load_annotation(self, index): 82 | """ 83 | Load image and bounding boxes info from txt files of pascal3d. 84 | """ 85 | 86 | objs = self.df[self.df.im_path == index] 87 | num_objs = len(objs) 88 | 89 | # original annotation for object detection 90 | boxes = np.zeros((num_objs, 4), dtype=np.uint16) 91 | gt_classes = np.zeros(num_objs, dtype=np.int32) 92 | overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) 93 | 94 | # "Seg" area for pascal is just the box area 95 | seg_areas = np.zeros((num_objs), dtype=np.float32) 96 | ishards = np.zeros((num_objs), dtype=np.int32) 97 | 98 | # Load object annotation into a data frame. 99 | for ix in range(num_objs): 100 | x1 = max(float(objs.iloc[ix]['left']), 0) 101 | y1 = max(float(objs.iloc[ix]['upper']), 0) 102 | x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1) 103 | y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1) 104 | cls = self._class_to_ind[objs.iloc[ix]['cls']] 105 | boxes[ix, :] = [x1, y1, x2, y2] 106 | gt_classes[ix] = cls 107 | overlaps[ix, cls] = 1.0 108 | 109 | ishards[ix] = objs.iloc[ix]['difficult'] 110 | if cls not in self._classes: 111 | continue 112 | seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) 113 | 114 | overlaps = scipy.sparse.csr_matrix(overlaps) 115 | 116 | return {'boxes': boxes, 117 | 'gt_classes': gt_classes, 118 | 'gt_ishard': ishards, 119 | 'gt_overlaps': overlaps, 120 | 'flipped': False, 121 | 'seg_areas': seg_areas} 122 | 123 | def _get_results_file_template(self): 124 | # data_path/results/_det_test_aeroplane.txt 125 | filename = self._comp_id + '_det_' + self._image_set + '_{:s}.txt' 126 | filedir = os.path.join(self._data_path, 'results') 127 | if not os.path.exists(filedir): 128 | os.makedirs(filedir) 129 | path = os.path.join(filedir, filename) 130 | return path 131 | 132 | def _write_results_file(self, all_boxes): 133 | for cls_ind, cls in enumerate(self.classes): 134 | if cls == '__background__': 135 | continue 136 | print('Writing {} results file'.format(cls)) 137 | filename = self._get_results_file_template().format(cls) 138 | with open(filename, 'wt') as f: 139 | for im_ind, index in enumerate(self.image_index): 140 | dets = all_boxes[cls_ind][im_ind] 141 | if dets == []: 142 | continue 143 | for k in range(dets.shape[0]): 144 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. 145 | format(index, dets[k, -1], 146 | dets[k, 0] + 1, dets[k, 1] + 1, 147 | dets[k, 2] + 1, dets[k, 3] + 1)) 148 | 149 | def _do_python_eval(self, output_dir='output'): 150 | cachedir = os.path.join(self._data_path, 'annotations_cache') 151 | if not os.path.isdir(output_dir): 152 | os.mkdir(output_dir) 153 | 154 | aps = [] 155 | for i, cls in enumerate(self._classes): 156 | if cls == '__background__': 157 | continue 158 | filename = self._get_results_file_template().format(cls) 159 | ap = custom_eval(filename, self.df, self._image_set, cls, cachedir, ovthresh=0.5) 160 | print('AP for {} = {:.3f}'.format(cls, ap)) 161 | 162 | aps.append(ap) 163 | 164 | if i == self.num_classes - len(cfg.CUSTOM_NOVEL_CLASSES): 165 | print('Mean AP = {:.4f} for base'.format(np.mean(aps))) 166 | if i == self.num_classes: 167 | print('Mean AP = {:.4f} for novel'.format(np.mean(aps[-20:]))) 168 | 169 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 170 | return np.mean(aps) 171 | 172 | def evaluate_detections(self, all_boxes, output_dir, **kwargs): 173 | self._write_results_file(all_boxes) 174 | AP = self._do_python_eval(output_dir) 175 | if self.config['cleanup']: 176 | for cls in self._classes: 177 | if cls == '__background__': 178 | continue 179 | filename = self._get_results_file_template().format(cls) 180 | os.remove(filename) 181 | return AP -------------------------------------------------------------------------------- /lib/datasets/custom_eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import numpy as np 4 | from math import radians 5 | 6 | 7 | def parse_rec(df, filename): 8 | """ Parse annotation file """ 9 | objects = [] 10 | objs = df[df.im_path == filename] 11 | for ix in range(len(objs)): 12 | obj_struct = {} 13 | obj_struct['class'] = objs.iloc[ix]['cls'] 14 | 15 | x1 = max(int(objs.iloc[ix]['left']), 0) 16 | y1 = max(int(objs.iloc[ix]['upper']), 0) 17 | x2 = min(int(objs.iloc[ix]['right']), int(objs.iloc[ix]['height'] - 1)) 18 | y2 = min(int(objs.iloc[ix]['lower']), int(objs.iloc[ix]['width'] - 1)) 19 | obj_struct['bbox'] = [x1, y1, x2, y2] 20 | 21 | obj_struct['difficult'] = objs.iloc[ix]['difficult'] 22 | objects.append(obj_struct) 23 | 24 | return objects 25 | 26 | 27 | def voc_ap(rec, prec): 28 | """ 29 | Compute VOC-like AP given precision and recall. 30 | """ 31 | # correct AP calculation 32 | # first append sentinel values at the end 33 | mrec = np.concatenate(([0.], rec, [1.])) 34 | mpre = np.concatenate(([0.], prec, [0.])) 35 | 36 | # compute the precision envelope 37 | for i in range(mpre.size - 1, 0, -1): 38 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 39 | 40 | # to calculate area under PR curve, look for points 41 | # where X axis (recall) changes value 42 | i = np.where(mrec[1:] != mrec[:-1])[0] 43 | 44 | # and sum (\Delta recall) * prec 45 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 46 | return ap 47 | 48 | 49 | def custom_eval(detpath, df, subset, classname, cachedir, ovthresh=0.5): 50 | """ 51 | Top level function that does the evaluation. 52 | detpath: Path to detections detpath.format(classname) should produce the detection results file. 53 | df: Data frame get from annotation csv file for the subset [train / val / test] 54 | classname: Class name 55 | cachedir: Directory for caching the annotations 56 | [ovthresh]: Overlap threshold (default = 0.5) 57 | """ 58 | # assumes detections are in detpath.format(classname) 59 | # cachedir caches the annotations in a pickle file 60 | 61 | # first load gt 62 | df = df[df.set == subset] 63 | if not os.path.isdir(cachedir): 64 | os.mkdir(cachedir) 65 | cachefile = os.path.join(cachedir, '{}_annots.pkl'.format(subset)) 66 | 67 | # read list of images 68 | imagenames = np.unique(df.im_path).tolist() 69 | 70 | if not os.path.isfile(cachefile): 71 | # load annotations 72 | recs = {} 73 | for i, imagename in enumerate(imagenames): 74 | recs[imagename] = parse_rec(df, imagename) 75 | if i % 100 == 0: 76 | print('Reading annotation for {:d}/{:d}'.format(i + 1, len(imagenames))) 77 | # save 78 | print('Saving cached annotations to {:s}'.format(cachefile)) 79 | with open(cachefile, 'wb') as f: 80 | pickle.dump(recs, f) 81 | else: 82 | # load 83 | with open(cachefile, 'rb') as f: 84 | try: 85 | recs = pickle.load(f) 86 | except: 87 | recs = pickle.load(f, encoding='bytes') 88 | 89 | # extract gt objects for this class 90 | class_recs = {} 91 | npos = 0 92 | for imagename in imagenames: 93 | R = [obj for obj in recs[imagename] if obj['class'] == classname] 94 | bbox = np.array([x['bbox'] for x in R]) 95 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 96 | 97 | det = [False] * len(R) 98 | npos = npos + sum(~difficult) 99 | class_recs[imagename] = {'bbox': bbox, 100 | 'difficult': difficult, 101 | 'det': det} 102 | 103 | # read dets 104 | detfile = detpath.format(classname) 105 | with open(detfile, 'r') as f: 106 | lines = f.readlines() 107 | 108 | splitlines = [x.strip().split(' ') for x in lines] 109 | image_ids = [x[0] for x in splitlines] 110 | confidence = np.array([float(x[1]) for x in splitlines]) 111 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 112 | 113 | nd = len(image_ids) 114 | tp = np.zeros(nd) 115 | fp = np.zeros(nd) 116 | 117 | if BB.shape[0] > 0: 118 | # sort by descending confidence 119 | sorted_ind = np.argsort(-confidence) 120 | BB = BB[sorted_ind, :] 121 | image_ids = [image_ids[x] for x in sorted_ind] 122 | 123 | # iterate for each detection 124 | for d in range(nd): 125 | R = class_recs[image_ids[d]] 126 | bb = BB[d, :].astype(float) 127 | 128 | ovmax = -np.inf 129 | BBGT = R['bbox'].astype(float) 130 | 131 | if BBGT.size > 0: 132 | # intersection 133 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 134 | iymin = np.maximum(BBGT[:, 1], bb[1]) 135 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 136 | iymax = np.minimum(BBGT[:, 3], bb[3]) 137 | iw = np.maximum(ixmax - ixmin + 1., 0.) 138 | ih = np.maximum(iymax - iymin + 1., 0.) 139 | inters = iw * ih 140 | 141 | # union 142 | uni = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \ 143 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters 144 | 145 | overlaps = inters / uni 146 | ovmax = np.max(overlaps) 147 | jmax = np.argmax(overlaps) 148 | 149 | # if the detection is correct 150 | if ovmax > ovthresh: 151 | if R['difficult'][jmax]: 152 | continue 153 | if not R['det'][jmax]: 154 | R['det'][jmax] = 1 155 | tp[d] = 1. 156 | else: 157 | fp[d] = 1. 158 | 159 | else: 160 | fp[d] = 1. 161 | 162 | # compute metrics AP 163 | fp = np.cumsum(fp) 164 | tp = np.cumsum(tp) 165 | rec = tp / float(npos) 166 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 167 | ap = voc_ap(rec, prec) 168 | 169 | return ap -------------------------------------------------------------------------------- /lib/datasets/custom_metadata.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import numpy as np 3 | import pandas as pd 4 | import cv2 5 | import collections 6 | import random 7 | import time 8 | 9 | import torch 10 | import torch.utils.data as data 11 | 12 | from model.utils.config import cfg 13 | import datasets 14 | from datasets.imdb import imdb 15 | import datasets.custom 16 | 17 | 18 | class MetaDatasetCustom(data.Dataset): 19 | def __init__(self, data_path, csv_file, img_size, image_set='train', shots=10, phase=1, shuffle=False): 20 | 21 | self.shuffle = shuffle 22 | self.img_size = img_size 23 | self.phase = phase 24 | self.shot_path = os.path.join(data_path, 'Shots.txt') 25 | self.shots = shots 26 | if phase == 2: 27 | self.shots = shots * 3 28 | 29 | self._data_path = data_path 30 | assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path) 31 | 32 | df = pd.read_csv(os.path.join(data_path, csv_file)) 33 | 34 | self.df = df[df.set == image_set] 35 | 36 | self._classes = tuple(['__background__'] + 37 | [c for c in np.unique(df.cls).tolist() if c not in cfg.CUSTOM_NOVEL_CLASSES] + 38 | [c for c in np.unique(df.cls).tolist() if c in cfg.CUSTOM_NOVEL_CLASSES]) 39 | self.num_classes = len(self._classes) 40 | 41 | if phase == 1: 42 | self.metaclass = [c for c in np.unique(df.cls).tolist() if c not in cfg.CUSTOM_NOVEL_CLASSES] 43 | else: 44 | self.metaclass = self._classes[1:] 45 | class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass)))) # class to index mapping 46 | 47 | self._image_index = np.unique(self.df.im_path).tolist() 48 | 49 | self.prndata = [] 50 | self.prncls = [] 51 | 52 | # Used for reproduce results with the same few-shot samples 53 | prn_image_pth = os.path.join(data_path, 'prn_image_{}shots.pt'.format(shots)) 54 | prn_mask_pth = os.path.join(data_path, 'prn_mask_{}shots.pt'.format(shots)) 55 | 56 | if os.path.exists(prn_image_pth) and os.path.exists(prn_mask_pth): 57 | prn_image = torch.load(prn_image_pth) 58 | prn_mask = torch.load(prn_mask_pth) 59 | else: 60 | prn_image, prn_mask = self.get_prndata() 61 | 62 | torch.save(prn_image, prn_image_pth) 63 | torch.save(prn_mask, prn_mask_pth) 64 | 65 | for i in range(shots): 66 | cls = [] 67 | data = [] 68 | for n, key in enumerate(list(prn_image.keys())): 69 | img = torch.from_numpy(np.array(prn_image[key][i % len(prn_image[key])])) 70 | img = img.unsqueeze(0) 71 | mask = torch.from_numpy(np.array(prn_mask[key][i % len(prn_mask[key])])) 72 | mask = mask.unsqueeze(0) 73 | mask = mask.unsqueeze(3) 74 | imgmask = torch.cat([img, mask], dim=3) 75 | cls.append(class_to_idx[key]) 76 | data.append(imgmask.permute(0, 3, 1, 2).contiguous()) 77 | self.prncls.append(cls) 78 | self.prndata.append(torch.cat(data, dim=0)) 79 | 80 | def __getitem__(self, index): 81 | return self.prndata[index], self.prncls[index] 82 | 83 | def __len__(self): 84 | return len(self.prndata) 85 | 86 | def image_id_at(self, i): 87 | """ 88 | Return the absolute path to image i in the image sequence. 89 | """ 90 | return i 91 | 92 | def image_path_at(self, i): 93 | """ 94 | Return the absolute path to image i in the image sequence. 95 | """ 96 | return self.image_path_from_index(self._image_index[i]) 97 | 98 | def image_path_from_index(self, index): 99 | """ 100 | Construct an image path from the image's "index" identifier. 101 | """ 102 | image_path = os.path.join(self._data_path, index) 103 | assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path) 104 | return image_path 105 | 106 | def get_prndata(self): 107 | print('\ngenerating {} shot samples...'.format(self.shots)) 108 | start = time.time() 109 | 110 | if self.shuffle: 111 | random.shuffle(self._image_index) 112 | prn_image = collections.defaultdict(list) 113 | prn_mask = collections.defaultdict(list) 114 | classes = collections.defaultdict(int) 115 | valid_annot = [] 116 | 117 | for cls in self.metaclass: 118 | classes[cls] = 0 119 | 120 | for index in self._image_index: 121 | 122 | objs = self.df[self.df.im_path == index] 123 | num_objs = len(objs) 124 | 125 | img = cv2.imread(self.image_path_from_index(index), cv2.IMREAD_COLOR) 126 | 127 | # tile channels for 1-channel images 128 | if len(img.shape) == 2: 129 | img = img[:, :, np.newaxis] 130 | img = np.concatenate((img, img, img), axis=2) 131 | 132 | # drop the last channel for 4-channel images 133 | if img.shape[-1] == 4: 134 | img = img[:, :, :-1] 135 | 136 | img = img.astype(np.float32, copy=False) 137 | img -= cfg.PIXEL_MEANS 138 | 139 | mask = np.zeros((self.img_size, self.img_size), dtype=np.float32) 140 | h, w = img.shape[0], img.shape[1] 141 | y_ration = float(h) / self.img_size 142 | x_ration = float(w) / self.img_size 143 | img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR) 144 | 145 | for ix in range(num_objs): 146 | x1 = max(float(objs.iloc[ix]['left']), 0) 147 | y1 = max(float(objs.iloc[ix]['upper']), 0) 148 | x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1) 149 | y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1) 150 | cls = objs.iloc[ix]['cls'] 151 | 152 | if objs.iloc[ix]['difficult']: 153 | continue 154 | 155 | if cls not in self.metaclass: 156 | continue 157 | 158 | if classes[cls] >= self.shots: 159 | continue 160 | 161 | classes[cls] += 1 162 | 163 | x1 = int(x1 / x_ration) 164 | y1 = int(y1 / y_ration) 165 | x2 = int(x2 / x_ration) 166 | y2 = int(y2 / y_ration) 167 | mask[y1:y2, x1:x2] = 1 168 | 169 | prn_image[cls].append(img_resize) 170 | prn_mask[cls].append(mask) 171 | 172 | valid_annot.append(objs.iloc[[ix]]) 173 | 174 | if len(classes) > 0 and min(classes.values()) == self.shots: 175 | break 176 | 177 | end = time.time() 178 | print('few-shot samples generated in {} s\n'.format(end - start)) 179 | 180 | # save filtered csv file 181 | valid_annot = pd.concat(valid_annot) 182 | valid_annot.to_csv(self.shot_path, index=False, header=True) 183 | 184 | return prn_image, prn_mask 185 | -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | 15 | from datasets.coco import coco 16 | from datasets.pascal_voc import pascal_voc 17 | from datasets.objectnet3d import objectnet3d 18 | from datasets.custom import custom 19 | 20 | 21 | # Set up Custom Dataset 22 | for split in ['train', 'val', 'shots']: 23 | name = 'custom_{}'.format(split) 24 | data_path = 'Your_Dataset_Root_Path' 25 | csv_file = 'Custom.txt' 26 | if split == 'shots': 27 | split = 'train' 28 | csv_file = 'Shots.txt' 29 | __sets[name] = (lambda split=split, data_path=data_path, csv_file=csv_file: custom(split, data_path, csv_file)) 30 | 31 | 32 | # Set up objectnet3d_ 33 | for split in ['train', 'val', 'test', 'shots']: 34 | name = 'objectnet3d_{}'.format(split) 35 | data_path = '/home/xiao/Datasets/ObjectNet3D' 36 | csv_file = 'ObjectNet3D_new.txt' 37 | if split == 'shots': 38 | split = 'train' 39 | csv_file = 'Shots.txt' 40 | __sets[name] = (lambda split=split, data_path=data_path, csv_file=csv_file: objectnet3d(split, data_path, csv_file)) 41 | 42 | 43 | # # Set up voc__ 44 | for year in ['2007', '2012']: 45 | for split in ['train', 'val', 'trainval', 'test', 'shots', 46 | 'train_first_split', 'train_second_split', 'train_third_split']: 47 | name = 'voc_{}_{}'.format(year, split) 48 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 49 | 50 | for year in ['2014']: 51 | for split in ['train', 'val', 'minival', 'valminusminival', 'trainval', 'shots', 52 | 'TFA1shot', 'TFA2shot', 'TFA3shot', 'TFA5shot', 'TFA10shot', 'TFA30shot']: 53 | name = 'coco_{}_{}'.format(year, split) 54 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 55 | 56 | for year in ['2017']: 57 | for split in ['train', 'val']: 58 | name = 'coco_{}_{}'.format(year, split) 59 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 60 | 61 | 62 | def get_imdb(name): 63 | """Get an imdb (image database) by name.""" 64 | if name not in __sets: 65 | raise KeyError('Unknown dataset: {}'.format(name)) 66 | return __sets[name]() 67 | 68 | 69 | def list_imdbs(): 70 | """List all registered imdbs.""" 71 | return list(__sets.keys()) 72 | -------------------------------------------------------------------------------- /lib/datasets/metadata.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Pytorch Meta R-CNN 3 | # Written by Anny Xu, Xiaopeng Yan, based on the code from Jianwei Yang 4 | # -------------------------------------------------------- 5 | import os 6 | import os.path 7 | import sys 8 | import torch.utils.data as data 9 | import cv2 10 | import torch 11 | import random 12 | import numpy as np 13 | if sys.version_info[0] == 2: 14 | import xml.etree.cElementTree as ET 15 | else: 16 | import xml.etree.ElementTree as ET 17 | from model.utils.config import cfg 18 | import collections 19 | 20 | 21 | class MetaDataset(data.Dataset): 22 | 23 | """Meta Dataset 24 | Arguments: 25 | root (string): filepath to VOCdevkit folder. 26 | image_set (string): imageset to use (eg. 'train', 'val') 27 | metaclass(string): the class name 28 | img_size(int) : the PRN network input size 29 | shot(int): the number of instances 30 | shuffle(bool) 31 | """ 32 | 33 | def __init__(self, root, image_sets, metaclass, img_size, shots=1, shuffle=False, phase=1): 34 | self.root = root 35 | self.image_set = image_sets 36 | self.img_size = img_size 37 | self.metaclass = metaclass 38 | self.shots = shots 39 | if phase == 2: 40 | self.shots = shots * 3 41 | self.shuffle = shuffle 42 | self._annopath = os.path.join('%s', 'Annotations', '%s.xml') 43 | self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg') 44 | self.shot_path = open(os.path.join(self.root, 'VOC2007', 'ImageSets/Main/shots.txt'), 'w') 45 | self.ids = list() 46 | for (year, name) in image_sets: 47 | self._year = year 48 | rootpath = os.path.join(self.root, 'VOC' + year) 49 | for line in open(os.path.join(rootpath, 'ImageSets', 'Main', name + '.txt')): 50 | self.ids.append((rootpath, line.strip())) 51 | 52 | class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass)))) # class to index mapping 53 | 54 | self.prndata = [] 55 | self.prncls = [] 56 | prn_image, prn_mask = self.get_prndata() 57 | for i in range(shots): 58 | cls = [] 59 | data = [] 60 | for n, key in enumerate(list(prn_image.keys())): 61 | img = torch.from_numpy(np.array(prn_image[key][i])) 62 | img = img.unsqueeze(0) 63 | mask = torch.from_numpy(np.array(prn_mask[key][i])) 64 | mask = mask.unsqueeze(0) 65 | mask = mask.unsqueeze(3) 66 | imgmask = torch.cat([img, mask], dim=3) 67 | cls.append(class_to_idx[key]) 68 | data.append(imgmask.permute(0, 3, 1, 2).contiguous()) 69 | self.prncls.append(cls) 70 | self.prndata.append(torch.cat(data, dim=0)) 71 | 72 | def __getitem__(self, index): 73 | return self.prndata[index], self.prncls[index] 74 | 75 | def get_prndata(self): 76 | ''' 77 | :return: the construct prn input data 78 | :prn_image: lists of images in shape of (H, W, 3) 79 | :prn_mask: lists of masks in shape pf (H, W) 80 | ''' 81 | if self.shuffle: 82 | random.shuffle(self.ids) 83 | prn_image = collections.defaultdict(list) 84 | prn_mask = collections.defaultdict(list) 85 | classes = collections.defaultdict(int) 86 | for cls in self.metaclass: 87 | classes[cls] = 0 88 | for img_id in self.ids: 89 | target = ET.parse(self._annopath % img_id).getroot() 90 | img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR) 91 | img = img.astype(np.float32, copy=False) 92 | img -= cfg.PIXEL_MEANS 93 | height, width, _ = img.shape 94 | mask = np.zeros((self.img_size, self.img_size), dtype=np.float32) 95 | h, w, _ = img.shape 96 | y_ration = float(h) / self.img_size 97 | x_ration = float(w) / self.img_size 98 | img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR) 99 | for obj in target.iter('object'): 100 | difficult = int(obj.find('difficult').text) == 1 101 | if difficult: 102 | continue 103 | name = obj.find('name').text.strip() 104 | if name not in self.metaclass: 105 | continue 106 | if classes[name] >= self.shots: 107 | break 108 | classes[name] += 1 109 | bbox = obj.find('bndbox') 110 | pts = ['xmin', 'ymin', 'xmax', 'ymax'] 111 | bndbox = [] 112 | for i, pt in enumerate(pts): 113 | cur_pt = int(float(bbox.find(pt).text)) - 1 114 | if i % 2 == 0: 115 | cur_pt = int(cur_pt / x_ration) 116 | bndbox.append(cur_pt) 117 | elif i % 2 == 1: 118 | cur_pt = int(cur_pt / y_ration) 119 | bndbox.append(cur_pt) 120 | mask[bndbox[1]:bndbox[3], bndbox[0]:bndbox[2]] = 1 121 | prn_image[name].append(img_resize) 122 | prn_mask[name].append(mask) 123 | self.shot_path.write(str(img_id[1])+'\n') 124 | break 125 | if len(classes) > 0 and min(classes.values()) == self.shots: 126 | break 127 | self.shot_path.close() 128 | return prn_image, prn_mask 129 | 130 | def __len__(self): 131 | return len(self.prndata) 132 | -------------------------------------------------------------------------------- /lib/datasets/metadata_3d.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | import datasets 10 | import datasets.objectnet3d 11 | import os, sys 12 | from datasets.imdb import imdb 13 | import xml.dom.minidom as minidom 14 | import numpy as np 15 | import scipy.sparse 16 | import scipy.io as sio 17 | import subprocess 18 | import pickle 19 | from model.utils.config import cfg 20 | import pandas as pd 21 | 22 | import torch.utils.data as data 23 | import cv2 24 | import torch 25 | import random 26 | import collections 27 | import time 28 | 29 | 30 | class MetaDataset3D(data.Dataset): 31 | def __init__(self, data_path, csv_file, img_size, image_set='train', shots=1, phase=1, shuffle=False): 32 | 33 | self.shuffle = shuffle 34 | self.img_size = img_size 35 | self.phase = phase 36 | self.shot_path = os.path.join(data_path, 'Shots.txt') 37 | self.shots = shots 38 | if phase == 2: 39 | self.shots = shots * 3 40 | 41 | self._data_path = data_path 42 | assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path) 43 | 44 | df = pd.read_csv(os.path.join(data_path, csv_file)) 45 | 46 | if image_set != 'train': 47 | df = df[df.occluded == 0] 48 | df = df[df.truncated == 0] 49 | self.df = df[df.set == image_set] 50 | 51 | self._classes = tuple(['__background__'] + 52 | [c for c in np.unique(df.cat).tolist() if c not in cfg.NOVEL_3D_CLASSES] + 53 | [c for c in np.unique(df.cat).tolist() if c in cfg.NOVEL_3D_CLASSES]) 54 | self.num_classes = len(self._classes) 55 | 56 | if phase == 1: 57 | self.metaclass = [c for c in np.unique(df.cat).tolist() if c not in cfg.NOVEL_3D_CLASSES] 58 | else: 59 | self.metaclass = self._classes[1:] 60 | class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass)))) # class to index mapping 61 | 62 | self._image_index = np.unique(self.df.im_path).tolist() 63 | 64 | self.prndata = [] 65 | self.prncls = [] 66 | 67 | prn_image_pth = os.path.join(data_path, 'prn_image_{}shots.pt'.format(shots)) 68 | prn_mask_pth = os.path.join(data_path, 'prn_mask_{}shots.pt'.format(shots)) 69 | 70 | if os.path.exists(prn_image_pth) and os.path.exists(prn_mask_pth): 71 | prn_image = torch.load(prn_image_pth) 72 | prn_mask = torch.load(prn_mask_pth) 73 | else: 74 | prn_image, prn_mask = self.get_prndata() 75 | 76 | torch.save(prn_image, prn_image_pth) 77 | torch.save(prn_mask, prn_mask_pth) 78 | 79 | for i in range(shots): 80 | cls = [] 81 | data = [] 82 | for n, key in enumerate(list(prn_image.keys())): 83 | img = torch.from_numpy(np.array(prn_image[key][i % len(prn_image[key])])) 84 | img = img.unsqueeze(0) 85 | mask = torch.from_numpy(np.array(prn_mask[key][i % len(prn_mask[key])])) 86 | mask = mask.unsqueeze(0) 87 | mask = mask.unsqueeze(3) 88 | imgmask = torch.cat([img, mask], dim=3) 89 | cls.append(class_to_idx[key]) 90 | data.append(imgmask.permute(0, 3, 1, 2).contiguous()) 91 | self.prncls.append(cls) 92 | self.prndata.append(torch.cat(data, dim=0)) 93 | 94 | def __getitem__(self, index): 95 | return self.prndata[index], self.prncls[index] 96 | 97 | def __len__(self): 98 | return len(self.prndata) 99 | 100 | def image_id_at(self, i): 101 | """ 102 | Return the absolute path to image i in the image sequence. 103 | """ 104 | return i 105 | 106 | def image_path_at(self, i): 107 | """ 108 | Return the absolute path to image i in the image sequence. 109 | """ 110 | return self.image_path_from_index(self._image_index[i]) 111 | 112 | def image_path_from_index(self, index): 113 | """ 114 | Construct an image path from the image's "index" identifier. 115 | """ 116 | image_path = os.path.join(self._data_path, index) 117 | assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path) 118 | return image_path 119 | 120 | def get_prndata(self): 121 | print('\ngenerating {} shot samples...'.format(self.shots)) 122 | start = time.time() 123 | 124 | if self.shuffle: 125 | random.shuffle(self._image_index) 126 | prn_image = collections.defaultdict(list) 127 | prn_mask = collections.defaultdict(list) 128 | classes = collections.defaultdict(int) 129 | valid_annot = [] 130 | 131 | for cls in self.metaclass: 132 | classes[cls] = 0 133 | 134 | for index in self._image_index: 135 | 136 | objs = self.df[self.df.im_path == index] 137 | num_objs = len(objs) 138 | 139 | img = cv2.imread(self.image_path_from_index(index), cv2.IMREAD_COLOR) 140 | # tile channels for 1-channel images 141 | if len(img.shape) == 2: 142 | img = img[:, :, np.newaxis] 143 | img = np.concatenate((img, img, img), axis=2) 144 | 145 | # drop the last channel for 4-channel images 146 | if img.shape[-1] == 4: 147 | img = img[:, :, :-1] 148 | 149 | img = img.astype(np.float32, copy=False) 150 | img -= cfg.PIXEL_MEANS 151 | 152 | mask = np.zeros((self.img_size, self.img_size), dtype=np.float32) 153 | h, w = img.shape[0], img.shape[1] 154 | y_ration = float(h) / self.img_size 155 | x_ration = float(w) / self.img_size 156 | img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR) 157 | 158 | for ix in range(num_objs): 159 | x1 = max(float(objs.iloc[ix]['left']), 0) 160 | y1 = max(float(objs.iloc[ix]['upper']), 0) 161 | x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1) 162 | y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1) 163 | cls = objs.iloc[ix]['cat'] 164 | 165 | if objs.iloc[ix]['difficult'] or objs.iloc[ix]['occluded'] or objs.iloc[ix]['truncated']: 166 | continue 167 | 168 | if cls not in self.metaclass: 169 | continue 170 | 171 | if classes[cls] >= self.shots: 172 | break 173 | 174 | classes[cls] += 1 175 | 176 | x1 = int(x1 / x_ration) 177 | y1 = int(y1 / y_ration) 178 | x2 = int(x2 / x_ration) 179 | y2 = int(y2 / y_ration) 180 | mask[y1:y2, x1:x2] = 1 181 | 182 | prn_image[cls].append(img_resize) 183 | prn_mask[cls].append(mask) 184 | 185 | valid_annot.append(objs.iloc[[ix]]) 186 | break 187 | 188 | if len(classes) > 0 and min(classes.values()) == self.shots: 189 | break 190 | 191 | end = time.time() 192 | print('few-shot samples generated in {} s\n'.format(end - start)) 193 | 194 | # save filtered csv file 195 | valid_annot = pd.concat(valid_annot) 196 | valid_annot.to_csv(self.shot_path, index=False, header=True) 197 | 198 | return prn_image, prn_mask 199 | -------------------------------------------------------------------------------- /lib/datasets/metadata_TFA.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | import cv2 3 | import torch 4 | import collections 5 | import time 6 | import os 7 | import numpy as np 8 | import json 9 | import os.path as osp 10 | 11 | from model.utils.config import cfg 12 | from pycocotools.coco import COCO 13 | 14 | 15 | class MetaDatasetTFA(data.Dataset): 16 | def __init__(self, root, image_set, year, img_size, shots=10): 17 | self.img_size = img_size 18 | self.TFA_split = os.path.join(root, 'annotations', 'TFA', 'cocosplit') 19 | self.shot_path = os.path.join(root, 'annotations', 'instances_TFA{}shot2014.json'.format(shots)) 20 | self.shots = shots 21 | 22 | # name, paths 23 | self._year = year 24 | self._image_set = image_set 25 | self._data_path = root 26 | 27 | # load COCO API, classes, class <-> id mappings 28 | self._COCO = COCO(self._get_ann_file()) 29 | self.json_data = self._COCO.dataset.copy() 30 | cats = self._COCO.loadCats(self._COCO.getCatIds()) 31 | 32 | self._classes = tuple(['__background__'] + 33 | [c['name'] for c in cats if c['name'] not in cfg.VOC_CLASSES] + 34 | [c['name'] for c in cats if c['name'] in cfg.VOC_CLASSES]) 35 | 36 | self._class_to_coco_cat_id = dict(list(zip([c['name'] for c in cats], self._COCO.getCatIds()))) 37 | 38 | self.metaclass = tuple([c['name'] for c in cats if c['name'] not in cfg.VOC_CLASSES] + 39 | [c['name'] for c in cats if c['name'] in cfg.VOC_CLASSES]) 40 | class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass)))) # class to index mapping 41 | 42 | self.prndata = [] 43 | self.prncls = [] 44 | 45 | prn_image_pth = os.path.join(root, 'annotations', 'TFA', 'prn_image_{}shots.pt'.format(shots)) 46 | prn_mask_pth = os.path.join(root, 'annotations', 'TFA', 'prn_mask_{}shots.pt'.format(shots)) 47 | 48 | if os.path.exists(prn_image_pth) and os.path.exists(prn_mask_pth): 49 | prn_image = torch.load(prn_image_pth) 50 | prn_mask = torch.load(prn_mask_pth) 51 | else: 52 | prn_image, prn_mask = self.get_prndata() 53 | 54 | torch.save(prn_image, prn_image_pth) 55 | torch.save(prn_mask, prn_mask_pth) 56 | 57 | for i in range(shots): 58 | cls = [] 59 | data = [] 60 | for n, key in enumerate(list(prn_image.keys())): 61 | img = torch.from_numpy(np.array(prn_image[key][i % len(prn_image[key])])) 62 | img = img.unsqueeze(0) 63 | mask = torch.from_numpy(np.array(prn_mask[key][i % len(prn_mask[key])])) 64 | mask = mask.unsqueeze(0) 65 | mask = mask.unsqueeze(3) 66 | imgmask = torch.cat([img, mask], dim=3) 67 | cls.append(class_to_idx[key]) 68 | data.append(imgmask.permute(0, 3, 1, 2).contiguous()) 69 | self.prncls.append(cls) 70 | self.prndata.append(torch.cat(data, dim=0)) 71 | 72 | def __getitem__(self, index): 73 | return self.prndata[index], self.prncls[index] 74 | 75 | def __len__(self): 76 | return len(self.prndata) 77 | 78 | def _get_ann_file(self): 79 | prefix = 'instances' if self._image_set.find('test') == -1 else 'image_info' 80 | return osp.join(self._data_path, 'annotations', prefix + '_' + self._image_set + self._year + '.json') 81 | 82 | def get_prndata(self): 83 | print('\ngenerating {} shot samples...'.format(self.shots)) 84 | start = time.time() 85 | 86 | prn_image = collections.defaultdict(list) 87 | prn_mask = collections.defaultdict(list) 88 | classes = collections.defaultdict(int) 89 | 90 | sample_images = [] 91 | sample_annots = [] 92 | 93 | for cls in self.metaclass: 94 | cls_json_file = 'full_box_{}shot_{}_trainval.json'.format(self.shots, cls) 95 | samples = json.load(open(osp.join(self.TFA_split, cls_json_file), 'r')) 96 | 97 | sample_annots.extend(samples['annotations']) 98 | sample_images.extend(samples['images']) 99 | 100 | for d_annot in samples['annotations']: 101 | img_id = d_annot['image_id'] 102 | 103 | for d_img in samples['images']: 104 | if d_img['id'] == img_id: 105 | img_info = d_img 106 | break 107 | 108 | width = img_info['width'] 109 | height = img_info['height'] 110 | 111 | x1 = np.max((0, d_annot['bbox'][0])) 112 | y1 = np.max((0, d_annot['bbox'][1])) 113 | x2 = np.min((width - 1, x1 + np.max((0, d_annot['bbox'][2] - 1)))) 114 | y2 = np.min((height - 1, y1 + np.max((0, d_annot['bbox'][3] - 1)))) 115 | 116 | if d_annot['area'] > 0 and x2 >= x1 and y2 >= y1: 117 | set_name = img_info['file_name'].split('_')[1] 118 | img_path = osp.join(self._data_path, 'images', set_name, img_info['file_name']) 119 | img = cv2.imread(img_path, cv2.IMREAD_COLOR) 120 | img = img.astype(np.float32, copy=False) 121 | img -= cfg.PIXEL_MEANS 122 | 123 | mask = np.zeros((self.img_size, self.img_size), dtype=np.float32) 124 | h, w, _ = img.shape 125 | y_ration = float(h) / self.img_size 126 | x_ration = float(w) / self.img_size 127 | img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR) 128 | 129 | classes[cls] += 1 130 | x1_r = int(x1 / x_ration) 131 | y1_r = int(y1 / y_ration) 132 | x2_r = int(x2 / x_ration) 133 | y2_r = int(y2 / y_ration) 134 | 135 | mask[y1_r:y2_r, x1_r:x2_r] = 1 136 | 137 | prn_image[cls].append(img_resize) 138 | prn_mask[cls].append(mask) 139 | 140 | end = time.time() 141 | print('few-shot samples generated in {} s\n'.format(end - start)) 142 | print('minimum sample number among all classes is {}'.format(min(classes.values()))) 143 | 144 | self.json_data['images'] = sample_images 145 | self.json_data['annotations'] = sample_annots 146 | 147 | def convert(o): 148 | if isinstance(o, np.int64): return int(o) 149 | raise TypeError 150 | 151 | with open(self.shot_path, 'w') as f: 152 | json.dump(self.json_data, f, default=convert) 153 | 154 | return prn_image, prn_mask 155 | -------------------------------------------------------------------------------- /lib/datasets/objectnet3d.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | import datasets 10 | import datasets.objectnet3d 11 | import os, sys 12 | from datasets.imdb import imdb 13 | import xml.dom.minidom as minidom 14 | import numpy as np 15 | import scipy.sparse 16 | import scipy.io as sio 17 | import subprocess 18 | import pickle 19 | import pandas as pd 20 | from model.utils.config import cfg 21 | from .pascal3d_eval import pascal3d_eval 22 | 23 | try: 24 | xrange # Python 2 25 | except NameError: 26 | xrange = range # Python 3 27 | 28 | 29 | class objectnet3d(imdb): 30 | def __init__(self, image_set, data_path, csv_file): 31 | imdb.__init__(self, 'objectnet3d_{}'.format(image_set)) 32 | self._image_set = image_set 33 | self._data_path = data_path 34 | assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path) 35 | 36 | df = pd.read_csv(os.path.join(data_path, csv_file)) 37 | 38 | self.df = df[df.set == 'train'] if image_set == 'shots' else df[df.set == image_set] 39 | 40 | self._classes = tuple(['__background__'] + 41 | [c for c in np.unique(df.cat).tolist() if c not in cfg.NOVEL_3D_CLASSES] + 42 | [c for c in np.unique(df.cat).tolist() if c in cfg.NOVEL_3D_CLASSES]) 43 | print('Dataset classes :{}'.format(self._classes)) 44 | 45 | self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes))) 46 | 47 | self._image_index = np.unique(self.df.im_path).tolist() 48 | 49 | # Default to roidb handler 50 | self._roidb_handler = self.gt_roidb 51 | self._comp_id = 'comp4' 52 | 53 | # Specific config options 54 | self.config = {'cleanup': False, 55 | 'use_salt': True, 56 | 'use_diff': False, 57 | 'matlab_eval': False} 58 | 59 | def image_id_at(self, i): 60 | """ 61 | Return the absolute path to image i in the image sequence. 62 | """ 63 | return i 64 | 65 | def image_path_at(self, i): 66 | """ 67 | Return the absolute path to image i in the image sequence. 68 | """ 69 | return self.image_path_from_index(self._image_index[i]) 70 | 71 | def image_path_from_index(self, index): 72 | """ 73 | Construct an image path from the image's "index" identifier. 74 | """ 75 | image_path = os.path.join(self._data_path, index) 76 | assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path) 77 | return image_path 78 | 79 | def gt_roidb(self): 80 | """ 81 | Return the database of ground-truth regions of interest. 82 | This function loads/saves from/to a cache file to speed up future calls. 83 | """ 84 | cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') 85 | if os.path.exists(cache_file): 86 | with open(cache_file, 'rb') as fid: 87 | roidb = pickle.load(fid) 88 | print('{} gt roidb loaded from {}'.format(self.name, cache_file)) 89 | return roidb 90 | 91 | gt_roidb = [self._load_pascal3d_annotation(index) for index in self.image_index] 92 | with open(cache_file, 'wb') as fid: 93 | pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) 94 | print('wrote gt roidb to {}'.format(cache_file)) 95 | 96 | return gt_roidb 97 | 98 | def _load_pascal3d_annotation(self, index): 99 | """ 100 | Load image and bounding boxes info from txt files of pascal3d. 101 | """ 102 | 103 | objs = self.df[self.df.im_path == index] 104 | num_objs = len(objs) 105 | 106 | # original annotation for object detection 107 | boxes = np.zeros((num_objs, 4), dtype=np.uint16) 108 | gt_classes = np.zeros(num_objs, dtype=np.int32) 109 | overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) 110 | 111 | # "Seg" area for pascal is just the box area 112 | seg_areas = np.zeros((num_objs), dtype=np.float32) 113 | ishards = np.zeros((num_objs), dtype=np.int32) 114 | 115 | # Load object annotation into a data frame. 116 | for ix in range(num_objs): 117 | x1 = max(float(objs.iloc[ix]['left']), 0) 118 | y1 = max(float(objs.iloc[ix]['upper']), 0) 119 | x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1) 120 | y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1) 121 | cls = self._class_to_ind[objs.iloc[ix]['cat']] 122 | boxes[ix, :] = [x1, y1, x2, y2] 123 | gt_classes[ix] = cls 124 | overlaps[ix, cls] = 1.0 125 | 126 | ishards[ix] = objs.iloc[ix]['difficult'] 127 | if cls not in self._classes: 128 | continue 129 | seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) 130 | 131 | overlaps = scipy.sparse.csr_matrix(overlaps) 132 | 133 | return {'boxes': boxes, 134 | 'gt_classes': gt_classes, 135 | 'gt_ishard': ishards, 136 | 'gt_overlaps': overlaps, 137 | 'flipped': False, 138 | 'seg_areas': seg_areas} 139 | 140 | def _get_results_file_template(self): 141 | # data_path/results/_det_test_aeroplane.txt 142 | filename = self._comp_id + '_det_' + self._image_set + '_{:s}.txt' 143 | filedir = os.path.join(self._data_path, 'results') 144 | if not os.path.exists(filedir): 145 | os.makedirs(filedir) 146 | path = os.path.join(filedir, filename) 147 | return path 148 | 149 | def _write_results_file(self, all_boxes): 150 | for cls_ind, cls in enumerate(self.classes): 151 | if cls == '__background__': 152 | continue 153 | print('Writing {} ObjectNet3D results file'.format(cls)) 154 | filename = self._get_results_file_template().format(cls) 155 | with open(filename, 'wt') as f: 156 | for im_ind, index in enumerate(self.image_index): 157 | dets = all_boxes[cls_ind][im_ind] 158 | if dets == []: 159 | continue 160 | # the VOCdevkit expects 1-based indices 161 | for k in xrange(dets.shape[0]): 162 | f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. 163 | format(index, dets[k, -1], 164 | dets[k, 0] + 1, dets[k, 1] + 1, 165 | dets[k, 2] + 1, dets[k, 3] + 1)) 166 | 167 | def _do_python_eval(self, output_dir='output'): 168 | cachedir = os.path.join(self._data_path, 'annotations_cache') 169 | if not os.path.isdir(output_dir): 170 | os.mkdir(output_dir) 171 | 172 | aps = [] 173 | for i, cls in enumerate(self._classes): 174 | if cls == '__background__': 175 | continue 176 | filename = self._get_results_file_template().format(cls) 177 | ap = pascal3d_eval(filename, self.df, self._image_set, cls, cachedir, ovthresh=0.5) 178 | print('AP for {} = {:.3f}'.format(cls, ap)) 179 | 180 | aps.append(ap) 181 | 182 | if i == 80: 183 | print('Mean AP = {:.4f} for base'.format(np.mean(aps))) 184 | if i == 100: 185 | print('Mean AP = {:.4f} for novel'.format(np.mean(aps[-20:]))) 186 | 187 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 188 | return np.mean(aps) 189 | 190 | def evaluate_detections(self, all_boxes, output_dir, **kwargs): 191 | self._write_results_file(all_boxes) 192 | AP = self._do_python_eval(output_dir) 193 | if self.config['cleanup']: 194 | for cls in self._classes: 195 | if cls == '__background__': 196 | continue 197 | filename = self._get_results_file_template().format(cls) 198 | os.remove(filename) 199 | return AP 200 | -------------------------------------------------------------------------------- /lib/datasets/pascal3d_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import os 11 | import pickle 12 | import numpy as np 13 | from math import radians 14 | 15 | 16 | def parse_rec(df, filename): 17 | """ Parse PASCAL 3D annotation file """ 18 | objects = [] 19 | objs = df[df.im_path == filename] 20 | for ix in range(len(objs)): 21 | obj_struct = {} 22 | obj_struct['class'] = objs.iloc[ix]['cat'] 23 | 24 | x1 = max(int(objs.iloc[ix]['left']), 0) 25 | y1 = max(int(objs.iloc[ix]['upper']), 0) 26 | x2 = min(int(objs.iloc[ix]['right']), int(objs.iloc[ix]['height'] - 1)) 27 | y2 = min(int(objs.iloc[ix]['lower']), int(objs.iloc[ix]['width'] - 1)) 28 | 29 | obj_struct['bbox'] = [x1, y1, x2, y2] 30 | 31 | obj_struct['difficult'] = objs.iloc[ix]['difficult'] 32 | obj_struct['truncated'] = objs.iloc[ix]['truncated'] 33 | obj_struct['occluded'] = objs.iloc[ix]['occluded'] 34 | objects.append(obj_struct) 35 | 36 | return objects 37 | 38 | 39 | def voc_ap(rec, prec): 40 | """ 41 | Compute VOC AP given precision and recall. 42 | """ 43 | # correct AP calculation 44 | # first append sentinel values at the end 45 | mrec = np.concatenate(([0.], rec, [1.])) 46 | mpre = np.concatenate(([0.], prec, [0.])) 47 | 48 | # compute the precision envelope 49 | for i in range(mpre.size - 1, 0, -1): 50 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 51 | 52 | # to calculate area under PR curve, look for points 53 | # where X axis (recall) changes value 54 | i = np.where(mrec[1:] != mrec[:-1])[0] 55 | 56 | # and sum (\Delta recall) * prec 57 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 58 | return ap 59 | 60 | 61 | def angles_to_matrix(angles): 62 | """Compute the rotation matrix from euler angles in degrees""" 63 | azi = radians(angles[0]) 64 | ele = radians(angles[1]) 65 | inp = radians(angles[2]) 66 | element1 = np.cos(inp) * np.cos(azi) - np.sin(inp) * np.cos(ele) * np.sin(azi) 67 | element2 = np.sin(inp) * np.cos(azi) + np.cos(inp) * np.cos(ele) * np.sin(azi) 68 | element3 = np.sin(ele) * np.sin(azi) 69 | element4 = -np.cos(inp) * np.sin(azi) - np.sin(inp) * np.cos(ele) * np.cos(azi) 70 | element5 = -np.sin(inp) * np.sin(azi) + np.cos(inp) * np.cos(ele) * np.cos(azi) 71 | element6 = np.sin(ele) * np.cos(azi) 72 | element7 = np.sin(inp) * np.sin(ele) 73 | element8 = -np.cos(inp) * np.sin(ele) 74 | element9 = np.cos(ele) 75 | R_mat = np.array((element1, element2, element3, 76 | element4, element5, element6, 77 | element7, element8, element9)).reshape(3, 3) 78 | return R_mat 79 | 80 | 81 | def azimuth_match_interval(azi_pred, azi_gt, view=24): 82 | offset = (360 / view) / 2 83 | step = 360 / view 84 | interval_pred = int((azi_pred + offset) % 360 // step) 85 | interval_gt = int((azi_gt + offset) % 360 // step) 86 | return interval_pred == interval_gt 87 | 88 | 89 | def viewpoint_err(vp_pred, vp_gt): 90 | """ Compute the Rotation Matrix error between viewpoints""" 91 | R_pred = angles_to_matrix(vp_pred) 92 | R_gt = angles_to_matrix(vp_gt) 93 | R_err = np.arccos(((np.sum(R_pred * R_gt)).clip(-1., 3.) - 1.) / 2) 94 | return R_err 95 | 96 | 97 | def pascal3d_eval(detpath, df, subset, classname, cachedir, ovthresh=0.5): 98 | """ 99 | Top level function that does the PASCAL VOC evaluation. 100 | detpath: Path to detections 101 | detpath.format(classname) should produce the detection results file. 102 | df: Data frame get from annotation csv file for the subset [train / val / test] 103 | classname: Category name (duh) 104 | cachedir: Directory for caching the annotations 105 | [ovthresh]: Overlap threshold (default = 0.5) 106 | """ 107 | # assumes detections are in detpath.format(classname) 108 | # cachedir caches the annotations in a pickle file 109 | 110 | # first load gt 111 | df = df[df.set == subset] 112 | if not os.path.isdir(cachedir): 113 | os.mkdir(cachedir) 114 | cachefile = os.path.join(cachedir, '{}_annots.pkl'.format(subset)) 115 | 116 | # read list of images 117 | imagenames = np.unique(df.im_path).tolist() 118 | 119 | if not os.path.isfile(cachefile): 120 | # load annotations 121 | recs = {} 122 | for i, imagename in enumerate(imagenames): 123 | recs[imagename] = parse_rec(df, imagename) 124 | if i % 100 == 0: 125 | print('Reading annotation for {:d}/{:d}'.format(i + 1, len(imagenames))) 126 | # save 127 | print('Saving cached annotations to {:s}'.format(cachefile)) 128 | with open(cachefile, 'wb') as f: 129 | pickle.dump(recs, f) 130 | else: 131 | # load 132 | with open(cachefile, 'rb') as f: 133 | try: 134 | recs = pickle.load(f) 135 | except: 136 | recs = pickle.load(f, encoding='bytes') 137 | 138 | # extract gt objects for this class 139 | class_recs = {} 140 | npos = 0 141 | for imagename in imagenames: 142 | R = [obj for obj in recs[imagename] if obj['class'] == classname] 143 | bbox = np.array([x['bbox'] for x in R]) 144 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 145 | 146 | det = [False] * len(R) 147 | npos = npos + sum(~difficult) 148 | class_recs[imagename] = {'bbox': bbox, 149 | 'difficult': difficult, 150 | 'det': det} 151 | 152 | # read dets 153 | detfile = detpath.format(classname) 154 | with open(detfile, 'r') as f: 155 | lines = f.readlines() 156 | 157 | splitlines = [x.strip().split(' ') for x in lines] 158 | image_ids = [x[0] for x in splitlines] 159 | confidence = np.array([float(x[1]) for x in splitlines]) 160 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 161 | 162 | nd = len(image_ids) 163 | tp = np.zeros(nd) 164 | fp = np.zeros(nd) 165 | 166 | if BB.shape[0] > 0: 167 | # sort by descending confidence 168 | sorted_ind = np.argsort(-confidence) 169 | BB = BB[sorted_ind, :] 170 | image_ids = [image_ids[x] for x in sorted_ind] 171 | 172 | # iterate for each detection 173 | for d in range(nd): 174 | R = class_recs[image_ids[d]] 175 | bb = BB[d, :].astype(float) 176 | 177 | ovmax = -np.inf 178 | BBGT = R['bbox'].astype(float) 179 | 180 | if BBGT.size > 0: 181 | # intersection 182 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 183 | iymin = np.maximum(BBGT[:, 1], bb[1]) 184 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 185 | iymax = np.minimum(BBGT[:, 3], bb[3]) 186 | iw = np.maximum(ixmax - ixmin + 1., 0.) 187 | ih = np.maximum(iymax - iymin + 1., 0.) 188 | inters = iw * ih 189 | 190 | # union 191 | uni = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \ 192 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters 193 | 194 | overlaps = inters / uni 195 | ovmax = np.max(overlaps) 196 | jmax = np.argmax(overlaps) 197 | 198 | # if the detection is correct 199 | if ovmax > ovthresh: 200 | if R['difficult'][jmax]: 201 | continue 202 | if not R['det'][jmax]: 203 | R['det'][jmax] = 1 204 | tp[d] = 1. 205 | else: 206 | fp[d] = 1. 207 | 208 | else: 209 | fp[d] = 1. 210 | 211 | # compute metrics AP 212 | fp = np.cumsum(fp) 213 | tp = np.cumsum(tp) 214 | rec = tp / float(npos) 215 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 216 | ap = voc_ap(rec, prec) 217 | 218 | return ap -------------------------------------------------------------------------------- /lib/datasets/tools/compute_prior.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import os 4 | import sys 5 | 6 | NUM_ATTR_REL = 200 7 | def cout_w(prob, num=NUM_ATTR_REL,dim=1): 8 | prob_weight = prob[:, :num] 9 | sum_value = np.sum(prob_weight, keepdims=True, axis=dim) + 0.1 10 | prob_weight = prob_weight / np.repeat(sum_value, prob_weight.shape[dim], axis=dim) 11 | return prob_weight 12 | 13 | def cp_kl(a, b): 14 | # compute kl diverse 15 | if np.sum(a) == 0 or np.sum(b) == 0: 16 | return 1 17 | sum_ = a * np.log(a / b) 18 | all_value = [x for x in sum_ if str(x) != 'nan' and str(x) != 'inf'] 19 | kl = np.sum(all_value) 20 | return kl 21 | 22 | def compute_js(attr_prob): 23 | cls_num = attr_prob.shape[0] 24 | similarity = np.zeros((cls_num, cls_num)) 25 | similarity[0, 1:] = 1 26 | similarity[1:, 0] = 1 27 | for i in range(1, cls_num): 28 | if i % 50 == 0: 29 | print('had proccessed {} cls...\n'.format(i)) 30 | for j in range(1, cls_num): 31 | if i == j: 32 | similarity[i,j] = 0 33 | else: 34 | similarity[i,j] = 0.5 * (cp_kl(attr_prob[i, :], 0.5*(attr_prob[i, :] + attr_prob[j,:])) 35 | + cp_kl(attr_prob[j, :], 0.5*(attr_prob[i, :] + attr_prob[j, :]))) 36 | return similarity 37 | 38 | if __name__=='__main__': 39 | data_path = '/data/VisualGenome/graph/' 40 | dim_ = 1000 41 | ## Compute attribute knowledge by JS-diversion 42 | graph_a = pickle.load(open(data_path + 'vg_attr_frequency_1000.pkl', 'rb')) 43 | 44 | ## You can get part of graph_a and match name with your datasets 45 | # We give an example of compute graph of VisualGenome with 1000 classes 46 | # first line of graph_a is background 47 | graph_a = cout_w(graph_a, num=len(graph_a)) 48 | graph_a = compute_js(graph_a) 49 | graph_a = 1 - graph_a 50 | pickle.dump(graph_a, open(data_path + 'vg_graph_a.pkl', 'wb')) 51 | 52 | ## Compute relation knowledge 53 | graph_r = pickle.load(open(data_path + 'vg_pair_frequency_1000.pkl', 'rb')) 54 | ## You can get part of graph_a and match name with your datasets 55 | # We give an example of compute graph of VisualGenome with 1000 classes 56 | relation_matrix = np.zeros((dim_, dim_)) 57 | relation_matrix = graph_r + graph_r.transpose() 58 | relation_matrix_row_sum = relation_matrix.sum(1) 59 | for i in range(dim_): 60 | relation_matrix[i, i] = relation_matrix_row_sum[i] + 1. 61 | prob_relation_matrix = np.zeros((dim_, dim_)) 62 | for i in range(dim_): 63 | for j in range(dim_): 64 | prob_relation_matrix[i, j] = relation_matrix[i, j] / ( 65 | np.sqrt(relation_matrix[i, i]) * np.sqrt(relation_matrix[j, j])) 66 | prob_relation_matrix_ba = np.zeros((dim_ + 1, dim_ + 1)) 67 | prob_relation_matrix_ba[1:, 1:] = prob_relation_matrix 68 | print(prob_relation_matrix_ba.shape) 69 | pickle.dump(prob_relation_matrix_ba, open(data_path + 'vg_graph_r.pkl', 'wb')) 70 | -------------------------------------------------------------------------------- /lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 7 | so that it's consistent with those computed by Jan Hosang (see: 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 9 | computing/research/object-recognition-and-scene-understanding/how- 10 | good-are-detection-proposals-really/) 11 | 12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 14 | """ 15 | 16 | def munge(src_dir): 17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 19 | 20 | files = os.listdir(src_dir) 21 | for fn in files: 22 | base, ext = os.path.splitext(fn) 23 | # first 14 chars / first 22 chars / all chars + .mat 24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 25 | first = base[:14] 26 | second = base[:22] 27 | dst_dir = os.path.join('MCG', 'mat', first, second) 28 | if not os.path.exists(dst_dir): 29 | os.makedirs(dst_dir) 30 | src = os.path.join(src_dir, fn) 31 | dst = os.path.join(dst_dir, fn) 32 | print('MV: {} -> {}'.format(src, dst)) 33 | os.rename(src, dst) 34 | 35 | if __name__ == '__main__': 36 | # src_dir should look something like: 37 | # src_dir = 'MCG-COCO-val2014-boxes' 38 | src_dir = sys.argv[1] 39 | munge(src_dir) 40 | -------------------------------------------------------------------------------- /lib/datasets/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import xml.etree.ElementTree as ET 11 | import os 12 | import pickle 13 | import numpy as np 14 | 15 | def parse_rec(filename): 16 | """ Parse a PASCAL VOC xml file """ 17 | tree = ET.parse(filename) 18 | objects = [] 19 | for obj in tree.findall('object'): 20 | obj_struct = {} 21 | obj_struct['name'] = obj.find('name').text 22 | obj_struct['pose'] = obj.find('pose').text 23 | obj_struct['truncated'] = int(obj.find('truncated').text) 24 | obj_struct['difficult'] = int(obj.find('difficult').text) 25 | bbox = obj.find('bndbox') 26 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 27 | int(bbox.find('ymin').text), 28 | int(bbox.find('xmax').text), 29 | int(bbox.find('ymax').text)] 30 | objects.append(obj_struct) 31 | 32 | return objects 33 | 34 | 35 | def voc_ap(rec, prec, use_07_metric=False): 36 | """ ap = voc_ap(rec, prec, [use_07_metric]) 37 | Compute VOC AP given precision and recall. 38 | If use_07_metric is true, uses the 39 | VOC 07 11 point method (default:False). 40 | """ 41 | if use_07_metric: 42 | # 11 point metric 43 | ap = 0. 44 | for t in np.arange(0., 1.1, 0.1): 45 | if np.sum(rec >= t) == 0: 46 | p = 0 47 | else: 48 | p = np.max(prec[rec >= t]) 49 | ap = ap + p / 11. 50 | else: 51 | # correct AP calculation 52 | # first append sentinel values at the end 53 | mrec = np.concatenate(([0.], rec, [1.])) 54 | mpre = np.concatenate(([0.], prec, [0.])) 55 | 56 | # compute the precision envelope 57 | for i in range(mpre.size - 1, 0, -1): 58 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 59 | 60 | # to calculate area under PR curve, look for points 61 | # where X axis (recall) changes value 62 | i = np.where(mrec[1:] != mrec[:-1])[0] 63 | 64 | # and sum (\Delta recall) * prec 65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 66 | return ap 67 | 68 | 69 | def voc_eval(detpath, 70 | annopath, 71 | imagesetfile, 72 | classname, 73 | cachedir, 74 | ovthresh=0.5, 75 | use_07_metric=False): 76 | """rec, prec, ap = voc_eval(detpath, 77 | annopath, 78 | imagesetfile, 79 | classname, 80 | [ovthresh], 81 | [use_07_metric]) 82 | 83 | Top level function that does the PASCAL VOC evaluation. 84 | 85 | detpath: Path to detections 86 | detpath.format(classname) should produce the detection results file. 87 | annopath: Path to annotations 88 | annopath.format(imagename) should be the xml annotations file. 89 | imagesetfile: Text file containing the list of images, one image per line. 90 | classname: Category name (duh) 91 | cachedir: Directory for caching the annotations 92 | [ovthresh]: Overlap threshold (default = 0.5) 93 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 94 | (default False) 95 | """ 96 | # assumes detections are in detpath.format(classname) 97 | # assumes annotations are in annopath.format(imagename) 98 | # assumes imagesetfile is a text file with each line an image name 99 | # cachedir caches the annotations in a pickle file 100 | 101 | # first load gt 102 | if not os.path.isdir(cachedir): 103 | os.mkdir(cachedir) 104 | cachefile = os.path.join(cachedir, 'annots.pkl') 105 | 106 | # read list of images 107 | with open(imagesetfile, 'r') as f: 108 | lines = f.readlines() 109 | imagenames = [x.strip() for x in lines] 110 | 111 | if not os.path.isfile(cachefile): 112 | # load annotations 113 | recs = {} 114 | for i, imagename in enumerate(imagenames): 115 | recs[imagename] = parse_rec(annopath.format(imagename)) 116 | if i % 100 == 0: 117 | print('Reading annotation for {:d}/{:d}'.format( 118 | i + 1, len(imagenames))) 119 | # save 120 | print('Saving cached annotations to {:s}'.format(cachefile)) 121 | with open(cachefile, 'wb') as f: 122 | pickle.dump(recs, f) 123 | else: 124 | # load 125 | with open(cachefile, 'rb') as f: 126 | try: 127 | recs = pickle.load(f) 128 | except: 129 | recs = pickle.load(f, encoding='bytes') 130 | 131 | # extract gt objects for this class 132 | class_recs = {} 133 | npos = 0 134 | for imagename in imagenames: 135 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 136 | bbox = np.array([x['bbox'] for x in R]) 137 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 138 | det = [False] * len(R) 139 | npos = npos + sum(~difficult) 140 | class_recs[imagename] = {'bbox': bbox, 141 | 'difficult': difficult, 142 | 'det': det} 143 | 144 | # read dets 145 | detfile = detpath.format(classname) 146 | with open(detfile, 'r') as f: 147 | lines = f.readlines() 148 | 149 | if len(lines) == 0: 150 | # No detection examples 151 | return 0, 0, 0, 0, npos 152 | 153 | splitlines = [x.strip().split(' ') for x in lines] 154 | image_ids = [x[0] for x in splitlines] 155 | confidence = np.array([float(x[1]) for x in splitlines]) 156 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 157 | 158 | nd = len(image_ids) 159 | tp = np.zeros(nd) 160 | fp = np.zeros(nd) 161 | 162 | if BB.shape[0] > 0: 163 | # sort by confidence 164 | sorted_ind = np.argsort(-confidence) 165 | sorted_scores = np.sort(-confidence) 166 | BB = BB[sorted_ind, :] 167 | image_ids = [image_ids[x] for x in sorted_ind] 168 | 169 | # go down dets and mark TPs and FPs 170 | for d in range(nd): 171 | R = class_recs[image_ids[d]] 172 | bb = BB[d, :].astype(float) 173 | ovmax = -np.inf 174 | BBGT = R['bbox'].astype(float) 175 | 176 | if BBGT.size > 0: 177 | # compute overlaps 178 | # intersection 179 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 180 | iymin = np.maximum(BBGT[:, 1], bb[1]) 181 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 182 | iymax = np.minimum(BBGT[:, 3], bb[3]) 183 | iw = np.maximum(ixmax - ixmin + 1., 0.) 184 | ih = np.maximum(iymax - iymin + 1., 0.) 185 | inters = iw * ih 186 | 187 | # union 188 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 189 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 190 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 191 | 192 | overlaps = inters / uni 193 | ovmax = np.max(overlaps) 194 | jmax = np.argmax(overlaps) 195 | 196 | if ovmax > ovthresh: 197 | if not R['difficult'][jmax]: 198 | if not R['det'][jmax]: 199 | tp[d] = 1. 200 | R['det'][jmax] = 1 201 | else: 202 | fp[d] = 1. 203 | else: 204 | fp[d] = 1. 205 | 206 | # compute precision recall 207 | fp = np.cumsum(fp) 208 | tp = np.cumsum(tp) 209 | rec = tp / float(npos) 210 | # avoid divide by zero in case the first detection matches a difficult 211 | # ground truth 212 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 213 | ap = voc_ap(rec, prec, use_07_metric) 214 | 215 | return rec, prec, ap 216 | -------------------------------------------------------------------------------- /lib/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | export CXXFLAGS="-std=c++11" 6 | export CFLAGS="-std=c99" 7 | 8 | python setup.py build_ext --inplace 9 | rm -rf build 10 | 11 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \ 12 | -gencode arch=compute_35,code=sm_35 \ 13 | -gencode arch=compute_50,code=sm_50 \ 14 | -gencode arch=compute_52,code=sm_52 \ 15 | -gencode arch=compute_60,code=sm_60 \ 16 | -gencode arch=compute_61,code=sm_61 " 17 | # -gencode arch=compute_70,code=sm_70 "" 18 | 19 | # compile NMS 20 | cd model/nms/src 21 | echo "Compiling nms kernels by nvcc..." 22 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \ 23 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 24 | 25 | cd ../ 26 | python build.py 27 | 28 | # compile roi_pooling 29 | cd ../../ 30 | cd model/roi_pooling/src 31 | echo "Compiling roi pooling kernels by nvcc..." 32 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \ 33 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 34 | cd ../ 35 | python build.py 36 | 37 | # compile roi_align 38 | cd ../../ 39 | cd model/roi_align/src 40 | echo "Compiling roi align kernels by nvcc..." 41 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \ 42 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 43 | cd ../ 44 | python build.py 45 | 46 | # compile roi_crop 47 | cd ../../ 48 | cd model/roi_crop/src 49 | echo "Compiling roi crop kernels by nvcc..." 50 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \ 51 | -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH 52 | cd ../ 53 | python build.py 54 | -------------------------------------------------------------------------------- /lib/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/__init__.py -------------------------------------------------------------------------------- /lib/model/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/faster_rcnn/__init__.py -------------------------------------------------------------------------------- /lib/model/faster_rcnn/vgg16.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Tensorflow Faster R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Xinlei Chen 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.autograd import Variable 14 | import math 15 | import torchvision.models as models 16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 17 | import pdb 18 | 19 | class vgg16(_fasterRCNN): 20 | def __init__(self, classes, pretrained=False, class_agnostic=False): 21 | self.model_path = 'data/pretrained_model/vgg16_caffe.pth' 22 | self.dout_base_model = 512 23 | self.pretrained = pretrained 24 | self.class_agnostic = class_agnostic 25 | 26 | _fasterRCNN.__init__(self, classes, class_agnostic) 27 | 28 | def _init_modules(self): 29 | vgg = models.vgg16() 30 | if self.pretrained: 31 | print("Loading pretrained weights from %s" %(self.model_path)) 32 | state_dict = torch.load(self.model_path) 33 | vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()}) 34 | 35 | vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1]) 36 | 37 | # not using the last maxpool layer 38 | self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 39 | 40 | # Fix the layers before conv3: 41 | for layer in range(10): 42 | for p in self.RCNN_base[layer].parameters(): p.requires_grad = False 43 | 44 | # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model) 45 | 46 | self.RCNN_top = vgg.classifier 47 | 48 | # not using the last maxpool layer 49 | self.RCNN_cls_score = nn.Linear(4096, self.n_classes) 50 | 51 | if self.class_agnostic: 52 | self.RCNN_bbox_pred = nn.Linear(4096, 4) 53 | else: 54 | self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes) 55 | 56 | def _head_to_tail(self, pool5): 57 | 58 | pool5_flat = pool5.view(pool5.size(0), -1) 59 | fc7 = self.RCNN_top(pool5_flat) 60 | 61 | return fc7 62 | 63 | -------------------------------------------------------------------------------- /lib/model/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/nms/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/nms/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/nms/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/nms_cuda.c'] 16 | headers += ['src/nms_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/nms_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | print(extra_objects) 25 | 26 | ffi = create_extension( 27 | '_ext.nms', 28 | headers=headers, 29 | sources=sources, 30 | define_macros=defines, 31 | relative_to=__file__, 32 | with_cuda=with_cuda, 33 | extra_objects=extra_objects 34 | ) 35 | 36 | if __name__ == '__main__': 37 | ffi.build() 38 | -------------------------------------------------------------------------------- /lib/model/nms/nms_cpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import torch 5 | 6 | def nms_cpu(dets, thresh): 7 | dets = dets.cpu().numpy() 8 | x1 = dets[:, 0] 9 | y1 = dets[:, 1] 10 | x2 = dets[:, 2] 11 | y2 = dets[:, 3] 12 | scores = dets[:, 4] 13 | 14 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 15 | order = scores.argsort()[::-1] 16 | 17 | keep = [] 18 | while order.size > 0: 19 | i = order.item(0) 20 | keep.append(i) 21 | xx1 = np.maximum(x1[i], x1[order[1:]]) 22 | yy1 = np.maximum(y1[i], y1[order[1:]]) 23 | xx2 = np.minimum(x2[i], x2[order[1:]]) 24 | yy2 = np.minimum(y2[i], y2[order[1:]]) 25 | 26 | w = np.maximum(0.0, xx2 - xx1 + 1) 27 | h = np.maximum(0.0, yy2 - yy1 + 1) 28 | inter = w * h 29 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 30 | 31 | inds = np.where(ovr <= thresh)[0] 32 | order = order[inds + 1] 33 | 34 | return torch.IntTensor(keep) 35 | 36 | 37 | 38 | 39 | def nms_cpu_np(dets, thresh): 40 | x1 = dets[:, 0] 41 | y1 = dets[:, 1] 42 | x2 = dets[:, 2] 43 | y2 = dets[:, 3] 44 | scores = dets[:, 4] 45 | 46 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 47 | order = scores.argsort()[::-1] 48 | 49 | keep = [] 50 | while order.size > 0: 51 | i = order.item(0) 52 | keep.append(i) 53 | xx1 = np.maximum(x1[i], x1[order[1:]]) 54 | yy1 = np.maximum(y1[i], y1[order[1:]]) 55 | xx2 = np.minimum(x2[i], x2[order[1:]]) 56 | yy2 = np.minimum(y2[i], y2[order[1:]]) 57 | 58 | w = np.maximum(0.0, xx2 - xx1 + 1) 59 | h = np.maximum(0.0, yy2 - yy1 + 1) 60 | inter = w * h 61 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 62 | 63 | inds = np.where(ovr <= thresh)[0] 64 | order = order[inds + 1] 65 | 66 | return keep 67 | 68 | 69 | 70 | def soft_nms_cpu(dets, threshold=0.001, Nt=0.3, method=1): 71 | boxes = dets.cpu().numpy() 72 | N = dets.shape[0] 73 | pos = 0 74 | maxscore = 0 75 | maxpos = 0 76 | 77 | for i in range(N): 78 | maxscore = boxes[i, 4] 79 | maxpos = i 80 | 81 | tx1 = boxes[i,0] 82 | ty1 = boxes[i,1] 83 | tx2 = boxes[i,2] 84 | ty2 = boxes[i,3] 85 | ts = boxes[i,4] 86 | 87 | pos = i + 1 88 | # get max box 89 | while pos < N: 90 | if maxscore < boxes[pos, 4]: 91 | maxscore = boxes[pos, 4] 92 | maxpos = pos 93 | pos = pos + 1 94 | 95 | # add max box as a detection 96 | boxes[i,0] = boxes[maxpos,0] 97 | boxes[i,1] = boxes[maxpos,1] 98 | boxes[i,2] = boxes[maxpos,2] 99 | boxes[i,3] = boxes[maxpos,3] 100 | boxes[i,4] = boxes[maxpos,4] 101 | 102 | # swap ith box with position of max box 103 | boxes[maxpos,0] = tx1 104 | boxes[maxpos,1] = ty1 105 | boxes[maxpos,2] = tx2 106 | boxes[maxpos,3] = ty2 107 | boxes[maxpos,4] = ts 108 | 109 | tx1 = boxes[i,0] 110 | ty1 = boxes[i,1] 111 | tx2 = boxes[i,2] 112 | ty2 = boxes[i,3] 113 | ts = boxes[i,4] 114 | 115 | pos = i + 1 116 | # NMS iterations, note that N changes if detection boxes fall below threshold 117 | while pos < N: 118 | x1 = boxes[pos, 0] 119 | y1 = boxes[pos, 1] 120 | x2 = boxes[pos, 2] 121 | y2 = boxes[pos, 3] 122 | s = boxes[pos, 4] 123 | 124 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 125 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 126 | if iw > 0: 127 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 128 | if ih > 0: 129 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 130 | ov = iw * ih / ua #iou between max box and detection box 131 | 132 | if method == 1: # linear 133 | if ov > Nt: 134 | weight = 1 - ov 135 | else: 136 | weight = 1 137 | elif method == 2: # gaussian 138 | weight = np.exp(-(ov * ov)/sigma) 139 | else: # original NMS 140 | if ov > Nt: 141 | weight = 0 142 | else: 143 | weight = 1 144 | 145 | boxes[pos, 4] = weight*boxes[pos, 4] 146 | 147 | # if box score falls below threshold, discard the box by swapping with last box 148 | # update N 149 | if boxes[pos, 4] < threshold: 150 | boxes[pos,0] = boxes[N-1, 0] 151 | boxes[pos,1] = boxes[N-1, 1] 152 | boxes[pos,2] = boxes[N-1, 2] 153 | boxes[pos,3] = boxes[N-1, 3] 154 | boxes[pos,4] = boxes[N-1, 4] 155 | N = N - 1 156 | pos = pos - 1 157 | 158 | pos = pos + 1 159 | 160 | keep = [i for i in range(N)] 161 | return keep, boxes 162 | 163 | 164 | def nms_domain(dets, dets_small, thresh_small=0.85, thresh_big=0.5): 165 | # dets = dets.cpu().numpy() 166 | # dets_small = dets_small.cpu().numpy() 167 | x1 = dets[:, 0] 168 | y1 = dets[:, 1] 169 | x2 = dets[:, 2] 170 | y2 = dets[:, 3] 171 | scores = dets[:, 4] 172 | 173 | x21 = dets_small[:, 0] 174 | y21 = dets_small[:, 1] 175 | x22 = dets_small[:, 2] 176 | y22 = dets_small[:, 3] 177 | scores2 = dets_small[:, 4] 178 | 179 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 180 | order = scores.argsort()[::-1] 181 | 182 | areas2 = (x22 - x21 + 1) * (y22 - y21 + 1) 183 | order2 = scores2.argsort()[::-1] 184 | 185 | throw = set() 186 | keep = set(list(range(len(dets_small)))) 187 | for i in range(len(dets)): 188 | xx1 = np.maximum(x1[i], x21) 189 | yy1 = np.maximum(y1[i], y21) 190 | xx2 = np.minimum(x2[i], x22) 191 | yy2 = np.minimum(y2[i], y22) 192 | 193 | w = np.maximum(0.0, xx2 - xx1 + 1) 194 | h = np.maximum(0.0, yy2 - yy1 + 1) 195 | inter = w * h 196 | 197 | ovr_1 = inter / (areas[i]) 198 | ovr_2 = inter / (areas2) 199 | 200 | throw_array = np.where((ovr_2 > thresh_small) & (ovr_1 < thresh_big))[0].tolist() 201 | throw.update(throw_array) 202 | keep = list(keep - throw) 203 | return keep 204 | 205 | -------------------------------------------------------------------------------- /lib/model/nms/nms_gpu.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import numpy as np 4 | from ._ext import nms 5 | import pdb 6 | 7 | def nms_gpu(dets, thresh): 8 | keep = dets.new(dets.size(0), 1).zero_().int() 9 | num_out = dets.new(1).zero_().int() 10 | nms.nms_cuda(keep, dets, num_out, thresh) 11 | keep = keep[:num_out[0]] 12 | return keep 13 | -------------------------------------------------------------------------------- /lib/model/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /lib/model/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | import torch 8 | from model.utils.config import cfg 9 | if torch.cuda.is_available(): 10 | from model.nms.nms_gpu import nms_gpu 11 | from model.nms.nms_cpu import nms_cpu 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | if dets.shape[0] == 0: 16 | return [] 17 | # ---numpy version--- 18 | # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 19 | # ---pytorch version--- 20 | 21 | return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh) 22 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "nms_cuda_kernel.h" 4 | 5 | // this symbol will be resolved automatically from PyTorch libs 6 | extern THCState *state; 7 | 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 9 | THCudaIntTensor *num_out, float nms_overlap_thresh) { 10 | 11 | nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 12 | THCudaIntTensor_data(state, num_out), 13 | THCudaTensor_data(state, boxes_host), 14 | THCudaTensor_size(state, boxes_host, 0), 15 | THCudaTensor_size(state, boxes_host, 1), 16 | nms_overlap_thresh); 17 | 18 | return 1; 19 | } 20 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out, 2 | // THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh); 3 | 4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host, 5 | THCudaIntTensor *num_out, float nms_overlap_thresh); 6 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "nms_cuda_kernel.h" 13 | 14 | #define CUDA_WARN(XXX) \ 15 | do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \ 16 | cudaGetErrorString(XXX) << ", at line " << __LINE__ \ 17 | << std::endl; cudaDeviceSynchronize(); } while (0) 18 | 19 | #define CUDA_CHECK(condition) \ 20 | /* Code block avoids redefinition of cudaError_t error */ \ 21 | do { \ 22 | cudaError_t error = condition; \ 23 | if (error != cudaSuccess) { \ 24 | std::cout << cudaGetErrorString(error) << std::endl; \ 25 | } \ 26 | } while (0) 27 | 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 30 | 31 | __device__ inline float devIoU(float const * const a, float const * const b) { 32 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 33 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 34 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 35 | float interS = width * height; 36 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 37 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 38 | return interS / (Sa + Sb - interS); 39 | } 40 | 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh, 42 | float *dev_boxes, unsigned long long *dev_mask) { 43 | const int row_start = blockIdx.y; 44 | const int col_start = blockIdx.x; 45 | 46 | // if (row_start > col_start) return; 47 | 48 | const int row_size = 49 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 50 | const int col_size = 51 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 52 | 53 | __shared__ float block_boxes[threadsPerBlock * 5]; 54 | if (threadIdx.x < col_size) { 55 | block_boxes[threadIdx.x * 5 + 0] = 56 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 57 | block_boxes[threadIdx.x * 5 + 1] = 58 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 59 | block_boxes[threadIdx.x * 5 + 2] = 60 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 61 | block_boxes[threadIdx.x * 5 + 3] = 62 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 63 | block_boxes[threadIdx.x * 5 + 4] = 64 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 65 | } 66 | __syncthreads(); 67 | 68 | if (threadIdx.x < row_size) { 69 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 70 | const float *cur_box = dev_boxes + cur_box_idx * 5; 71 | int i = 0; 72 | unsigned long long t = 0; 73 | int start = 0; 74 | if (row_start == col_start) { 75 | start = threadIdx.x + 1; 76 | } 77 | for (i = start; i < col_size; i++) { 78 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 79 | t |= 1ULL << i; 80 | } 81 | } 82 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 83 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 84 | } 85 | } 86 | 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 88 | int boxes_dim, float nms_overlap_thresh) { 89 | 90 | float* boxes_dev = NULL; 91 | unsigned long long* mask_dev = NULL; 92 | 93 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 94 | 95 | CUDA_CHECK(cudaMalloc(&boxes_dev, 96 | boxes_num * boxes_dim * sizeof(float))); 97 | CUDA_CHECK(cudaMemcpy(boxes_dev, 98 | boxes_host, 99 | boxes_num * boxes_dim * sizeof(float), 100 | cudaMemcpyHostToDevice)); 101 | 102 | CUDA_CHECK(cudaMalloc(&mask_dev, 103 | boxes_num * col_blocks * sizeof(unsigned long long))); 104 | 105 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 106 | DIVUP(boxes_num, threadsPerBlock)); 107 | dim3 threads(threadsPerBlock); 108 | 109 | // printf("i am at line %d\n", boxes_num); 110 | // printf("i am at line %d\n", boxes_dim); 111 | 112 | nms_kernel<<>>(boxes_num, 113 | nms_overlap_thresh, 114 | boxes_dev, 115 | mask_dev); 116 | 117 | std::vector mask_host(boxes_num * col_blocks); 118 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 119 | mask_dev, 120 | sizeof(unsigned long long) * boxes_num * col_blocks, 121 | cudaMemcpyDeviceToHost)); 122 | 123 | std::vector remv(col_blocks); 124 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 125 | 126 | // we need to create a memory for keep_out on cpu 127 | // otherwise, the following code cannot run 128 | 129 | int* keep_out_cpu = new int[boxes_num]; 130 | 131 | int num_to_keep = 0; 132 | for (int i = 0; i < boxes_num; i++) { 133 | int nblock = i / threadsPerBlock; 134 | int inblock = i % threadsPerBlock; 135 | 136 | if (!(remv[nblock] & (1ULL << inblock))) { 137 | // orignal: keep_out[num_to_keep++] = i; 138 | keep_out_cpu[num_to_keep++] = i; 139 | unsigned long long *p = &mask_host[0] + i * col_blocks; 140 | for (int j = nblock; j < col_blocks; j++) { 141 | remv[j] |= p[j]; 142 | } 143 | } 144 | } 145 | 146 | // copy keep_out_cpu to keep_out on gpu 147 | CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice)); 148 | 149 | // *num_out = num_to_keep; 150 | 151 | // original: *num_out = num_to_keep; 152 | // copy num_to_keep to num_out on gpu 153 | 154 | CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice)); 155 | 156 | // release cuda memory 157 | CUDA_CHECK(cudaFree(boxes_dev)); 158 | CUDA_CHECK(cudaFree(mask_dev)); 159 | // release cpu memory 160 | delete []keep_out_cpu; 161 | } 162 | -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/nms/src/nms_cuda_kernel.cu.o -------------------------------------------------------------------------------- /lib/model/nms/src/nms_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num, 6 | int boxes_dim, float nms_overlap_thresh); 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /lib/model/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_align/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_align_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | if features.is_cuda: 24 | roi_align.roi_align_forward_cuda(self.aligned_height, 25 | self.aligned_width, 26 | self.spatial_scale, features, 27 | rois, output) 28 | else: 29 | raise NotImplementedError 30 | 31 | return output 32 | 33 | def backward(self, grad_output): 34 | assert(self.feature_size is not None and grad_output.is_cuda) 35 | 36 | batch_size, num_channels, data_height, data_width = self.feature_size 37 | 38 | grad_input = self.rois.new(batch_size, num_channels, data_height, 39 | data_width).zero_() 40 | roi_align.roi_align_backward_cuda(self.aligned_height, 41 | self.aligned_width, 42 | self.spatial_scale, grad_output, 43 | self.rois, grad_input) 44 | 45 | # print grad_input 46 | 47 | return grad_input, None 48 | -------------------------------------------------------------------------------- /lib/model/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 28 | self.spatial_scale)(features, rois) 29 | return avg_pool2d(x, kernel_size=2, stride=1) 30 | 31 | class RoIAlignMax(Module): 32 | def __init__(self, aligned_height, aligned_width, spatial_scale): 33 | super(RoIAlignMax, self).__init__() 34 | 35 | self.aligned_width = int(aligned_width) 36 | self.aligned_height = int(aligned_height) 37 | self.spatial_scale = float(spatial_scale) 38 | 39 | def forward(self, features, rois): 40 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 41 | self.spatial_scale)(features, rois) 42 | return max_pool2d(x, kernel_size=2, stride=1) 43 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "roi_align_kernel.h" 9 | 10 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 12 | i += blockDim.x * gridDim.x) 13 | 14 | 15 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, 16 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) { 17 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 18 | // (n, c, ph, pw) is an element in the aligned output 19 | // int n = index; 20 | // int pw = n % aligned_width; 21 | // n /= aligned_width; 22 | // int ph = n % aligned_height; 23 | // n /= aligned_height; 24 | // int c = n % channels; 25 | // n /= channels; 26 | 27 | int pw = index % aligned_width; 28 | int ph = (index / aligned_width) % aligned_height; 29 | int c = (index / aligned_width / aligned_height) % channels; 30 | int n = index / aligned_width / aligned_height / channels; 31 | 32 | // bottom_rois += n * 5; 33 | float roi_batch_ind = bottom_rois[n * 5 + 0]; 34 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale; 35 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale; 36 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale; 37 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale; 38 | 39 | // Force malformed ROIs to be 1x1 40 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 41 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 42 | float bin_size_h = roi_height / (aligned_height - 1.); 43 | float bin_size_w = roi_width / (aligned_width - 1.); 44 | 45 | float h = (float)(ph) * bin_size_h + roi_start_h; 46 | float w = (float)(pw) * bin_size_w + roi_start_w; 47 | 48 | int hstart = fminf(floor(h), height - 2); 49 | int wstart = fminf(floor(w), width - 2); 50 | 51 | int img_start = roi_batch_ind * channels * height * width; 52 | 53 | // bilinear interpolation 54 | if (h < 0 || h >= height || w < 0 || w >= width) { 55 | top_data[index] = 0.; 56 | } else { 57 | float h_ratio = h - (float)(hstart); 58 | float w_ratio = w - (float)(wstart); 59 | int upleft = img_start + (c * height + hstart) * width + wstart; 60 | int upright = upleft + 1; 61 | int downleft = upleft + width; 62 | int downright = downleft + 1; 63 | 64 | top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio) 65 | + bottom_data[upright] * (1. - h_ratio) * w_ratio 66 | + bottom_data[downleft] * h_ratio * (1. - w_ratio) 67 | + bottom_data[downright] * h_ratio * w_ratio; 68 | } 69 | } 70 | } 71 | 72 | 73 | int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width, 74 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) { 75 | const int kThreadsPerBlock = 1024; 76 | const int output_size = num_rois * aligned_height * aligned_width * channels; 77 | cudaError_t err; 78 | 79 | 80 | ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 81 | output_size, bottom_data, spatial_scale, height, width, channels, 82 | aligned_height, aligned_width, bottom_rois, top_data); 83 | 84 | err = cudaGetLastError(); 85 | if(cudaSuccess != err) { 86 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 87 | exit( -1 ); 88 | } 89 | 90 | return 1; 91 | } 92 | 93 | 94 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, 95 | const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) { 96 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 97 | 98 | // (n, c, ph, pw) is an element in the aligned output 99 | int pw = index % aligned_width; 100 | int ph = (index / aligned_width) % aligned_height; 101 | int c = (index / aligned_width / aligned_height) % channels; 102 | int n = index / aligned_width / aligned_height / channels; 103 | 104 | float roi_batch_ind = bottom_rois[n * 5 + 0]; 105 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale; 106 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale; 107 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale; 108 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale; 109 | /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */ 110 | /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */ 111 | /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */ 112 | /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */ 113 | 114 | // Force malformed ROIs to be 1x1 115 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 116 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 117 | float bin_size_h = roi_height / (aligned_height - 1.); 118 | float bin_size_w = roi_width / (aligned_width - 1.); 119 | 120 | float h = (float)(ph) * bin_size_h + roi_start_h; 121 | float w = (float)(pw) * bin_size_w + roi_start_w; 122 | 123 | int hstart = fminf(floor(h), height - 2); 124 | int wstart = fminf(floor(w), width - 2); 125 | 126 | int img_start = roi_batch_ind * channels * height * width; 127 | 128 | // bilinear interpolation 129 | if (!(h < 0 || h >= height || w < 0 || w >= width)) { 130 | float h_ratio = h - (float)(hstart); 131 | float w_ratio = w - (float)(wstart); 132 | int upleft = img_start + (c * height + hstart) * width + wstart; 133 | int upright = upleft + 1; 134 | int downleft = upleft + width; 135 | int downright = downleft + 1; 136 | 137 | atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio)); 138 | atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio); 139 | atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio)); 140 | atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio); 141 | } 142 | } 143 | } 144 | 145 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width, 146 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) { 147 | const int kThreadsPerBlock = 1024; 148 | const int output_size = num_rois * aligned_height * aligned_width * channels; 149 | cudaError_t err; 150 | 151 | ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 152 | output_size, top_diff, spatial_scale, height, width, channels, 153 | aligned_height, aligned_width, bottom_diff, bottom_rois); 154 | 155 | err = cudaGetLastError(); 156 | if(cudaSuccess != err) { 157 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 158 | exit( -1 ); 159 | } 160 | 161 | return 1; 162 | } 163 | 164 | 165 | #ifdef __cplusplus 166 | } 167 | #endif 168 | -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/src/roi_align_kernel.cu.o -------------------------------------------------------------------------------- /lib/model/roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /lib/model/roi_crop/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/_ext/roi_crop/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_crop import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_crop/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | #this_file = os.path.dirname(__file__) 7 | 8 | sources = ['src/roi_crop.c'] 9 | headers = ['src/roi_crop.h'] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_crop_cuda.c'] 16 | headers += ['src/roi_crop_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_crop', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/crop_resize.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | from cffi import FFI 6 | ffi = FFI() 7 | 8 | class RoICropFunction(Function): 9 | def forward(self, input1, input2): 10 | self.input1 = input1 11 | self.input2 = input2 12 | self.device_c = ffi.new("int *") 13 | output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]) 14 | #print('decice %d' % torch.cuda.current_device()) 15 | if input1.is_cuda: 16 | self.device = torch.cuda.current_device() 17 | else: 18 | self.device = -1 19 | self.device_c[0] = self.device 20 | if not input1.is_cuda: 21 | roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output) 22 | else: 23 | output = output.cuda(self.device) 24 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 25 | return output 26 | 27 | def backward(self, grad_output): 28 | grad_input1 = torch.zeros(self.input1.size()) 29 | grad_input2 = torch.zeros(self.input2.size()) 30 | #print('backward decice %d' % self.device) 31 | if not grad_output.is_cuda: 32 | roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output) 33 | else: 34 | grad_input1 = grad_input1.cuda(self.device) 35 | grad_input2 = grad_input2.cuda(self.device) 36 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 37 | return grad_input1, grad_input2 38 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/gridgen.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | import numpy as np 5 | 6 | 7 | class AffineGridGenFunction(Function): 8 | def __init__(self, height, width,lr=1): 9 | super(AffineGridGenFunction, self).__init__() 10 | self.lr = lr 11 | self.height, self.width = height, width 12 | self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32) 13 | self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0) 14 | self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0) 15 | # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0) 16 | # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0) 17 | self.grid[:,:,2] = np.ones([self.height, width]) 18 | self.grid = torch.from_numpy(self.grid.astype(np.float32)) 19 | #print(self.grid) 20 | 21 | def forward(self, input1): 22 | self.input1 = input1 23 | output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 24 | self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_() 25 | for i in range(input1.size(0)): 26 | self.batchgrid[i] = self.grid.astype(self.batchgrid[i]) 27 | 28 | # if input1.is_cuda: 29 | # self.batchgrid = self.batchgrid.cuda() 30 | # output = output.cuda() 31 | 32 | for i in range(input1.size(0)): 33 | output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2) 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | 39 | grad_input1 = self.input1.new(self.input1.size()).zero_() 40 | 41 | # if grad_output.is_cuda: 42 | # self.batchgrid = self.batchgrid.cuda() 43 | # grad_input1 = grad_input1.cuda() 44 | 45 | grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3)) 46 | return grad_input1 47 | -------------------------------------------------------------------------------- /lib/model/roi_crop/functions/roi_crop.py: -------------------------------------------------------------------------------- 1 | # functions/add.py 2 | import torch 3 | from torch.autograd import Function 4 | from .._ext import roi_crop 5 | import pdb 6 | 7 | class RoICropFunction(Function): 8 | def forward(self, input1, input2): 9 | self.input1 = input1.clone() 10 | self.input2 = input2.clone() 11 | output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_() 12 | assert output.get_device() == input1.get_device(), "output and input1 must on the same device" 13 | assert output.get_device() == input2.get_device(), "output and input2 must on the same device" 14 | roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output) 15 | return output 16 | 17 | def backward(self, grad_output): 18 | grad_input1 = self.input1.new(self.input1.size()).zero_() 19 | grad_input2 = self.input2.new(self.input2.size()).zero_() 20 | roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output) 21 | return grad_input1, grad_input2 22 | -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_crop/modules/roi_crop.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_crop import RoICropFunction 3 | 4 | class _RoICrop(Module): 5 | def __init__(self, layout = 'BHWD'): 6 | super(_RoICrop, self).__init__() 7 | def forward(self, input1, input2): 8 | return RoICropFunction()(input1, input2) 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop.h: -------------------------------------------------------------------------------- 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 2 | 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 4 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 5 | 6 | 7 | 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output); 9 | 10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages, 11 | THFloatTensor *gradGrids, THFloatTensor *gradOutput); 12 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "roi_crop_cuda_kernel.h" 5 | 6 | #define real float 7 | 8 | // this symbol will be resolved automatically from PyTorch libs 9 | extern THCState *state; 10 | 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 12 | // we assume BHWD format in inputImages 13 | // we assume BHW(YX) format on grids 14 | 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){ 16 | // THCState *state = getCutorchState(L); 17 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 18 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 19 | // THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor"); 20 | 21 | int success = 0; 22 | success = BilinearSamplerBHWD_updateOutput_cuda_kernel(output->size[1], 23 | output->size[3], 24 | output->size[2], 25 | output->size[0], 26 | THCudaTensor_size(state, inputImages, 1), 27 | THCudaTensor_size(state, inputImages, 2), 28 | THCudaTensor_size(state, inputImages, 3), 29 | THCudaTensor_size(state, inputImages, 0), 30 | THCudaTensor_data(state, inputImages), 31 | THCudaTensor_stride(state, inputImages, 0), 32 | THCudaTensor_stride(state, inputImages, 1), 33 | THCudaTensor_stride(state, inputImages, 2), 34 | THCudaTensor_stride(state, inputImages, 3), 35 | THCudaTensor_data(state, grids), 36 | THCudaTensor_stride(state, grids, 0), 37 | THCudaTensor_stride(state, grids, 3), 38 | THCudaTensor_stride(state, grids, 1), 39 | THCudaTensor_stride(state, grids, 2), 40 | THCudaTensor_data(state, output), 41 | THCudaTensor_stride(state, output, 0), 42 | THCudaTensor_stride(state, output, 1), 43 | THCudaTensor_stride(state, output, 2), 44 | THCudaTensor_stride(state, output, 3), 45 | THCState_getCurrentStream(state)); 46 | 47 | //check for errors 48 | if (!success) { 49 | THError("aborting"); 50 | } 51 | return 1; 52 | } 53 | 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 55 | THCudaTensor *gradGrids, THCudaTensor *gradOutput) 56 | { 57 | // THCState *state = getCutorchState(L); 58 | // THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor"); 59 | // THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor"); 60 | // THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor"); 61 | // THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor"); 62 | // THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor"); 63 | 64 | int success = 0; 65 | success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(gradOutput->size[1], 66 | gradOutput->size[3], 67 | gradOutput->size[2], 68 | gradOutput->size[0], 69 | THCudaTensor_size(state, inputImages, 1), 70 | THCudaTensor_size(state, inputImages, 2), 71 | THCudaTensor_size(state, inputImages, 3), 72 | THCudaTensor_size(state, inputImages, 0), 73 | THCudaTensor_data(state, inputImages), 74 | THCudaTensor_stride(state, inputImages, 0), 75 | THCudaTensor_stride(state, inputImages, 1), 76 | THCudaTensor_stride(state, inputImages, 2), 77 | THCudaTensor_stride(state, inputImages, 3), 78 | THCudaTensor_data(state, grids), 79 | THCudaTensor_stride(state, grids, 0), 80 | THCudaTensor_stride(state, grids, 3), 81 | THCudaTensor_stride(state, grids, 1), 82 | THCudaTensor_stride(state, grids, 2), 83 | THCudaTensor_data(state, gradInputImages), 84 | THCudaTensor_stride(state, gradInputImages, 0), 85 | THCudaTensor_stride(state, gradInputImages, 1), 86 | THCudaTensor_stride(state, gradInputImages, 2), 87 | THCudaTensor_stride(state, gradInputImages, 3), 88 | THCudaTensor_data(state, gradGrids), 89 | THCudaTensor_stride(state, gradGrids, 0), 90 | THCudaTensor_stride(state, gradGrids, 3), 91 | THCudaTensor_stride(state, gradGrids, 1), 92 | THCudaTensor_stride(state, gradGrids, 2), 93 | THCudaTensor_data(state, gradOutput), 94 | THCudaTensor_stride(state, gradOutput, 0), 95 | THCudaTensor_stride(state, gradOutput, 1), 96 | THCudaTensor_stride(state, gradOutput, 2), 97 | THCudaTensor_stride(state, gradOutput, 3), 98 | THCState_getCurrentStream(state)); 99 | 100 | //check for errors 101 | if (!success) { 102 | THError("aborting"); 103 | } 104 | return 1; 105 | } 106 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda.h: -------------------------------------------------------------------------------- 1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW) 2 | // we assume BHWD format in inputImages 3 | // we assume BHW(YX) format on grids 4 | 5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output); 6 | 7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages, 8 | THCudaTensor *gradGrids, THCudaTensor *gradOutput); 9 | -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda_kernel.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/src/roi_crop_cuda_kernel.cu.o -------------------------------------------------------------------------------- /lib/model/roi_crop/src/roi_crop_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc, 7 | /*output->size[2]*/int ow, 8 | /*output->size[1]*/int oh, 9 | /*output->size[0]*/int ob, 10 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 11 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 12 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 13 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 14 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 15 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 16 | /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw, 17 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 18 | 19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc, 20 | /*gradOutput->size[2]*/int gow, 21 | /*gradOutput->size[1]*/int goh, 22 | /*gradOutput->size[0]*/int gob, 23 | /*THCudaTensor_size(state, inputImages, 3)*/int ic, 24 | /*THCudaTensor_size(state, inputImages, 1)*/int ih, 25 | /*THCudaTensor_size(state, inputImages, 2)*/int iw, 26 | /*THCudaTensor_size(state, inputImages, 0)*/int ib, 27 | /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw, 28 | /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw, 29 | /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw, 30 | /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw, 31 | /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw, 32 | /*THCState_getCurrentStream(state)*/cudaStream_t stream); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/_ext/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import torch 4 | from torch.utils.ffi import create_extension 5 | 6 | 7 | sources = ['src/roi_pooling.c'] 8 | headers = ['src/roi_pooling.h'] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/roi_pooling_cuda.c'] 15 | headers += ['src/roi_pooling_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/roi_pooling.cu.o'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = create_extension( 25 | '_ext.roi_pooling', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/functions/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 26 | features, rois, output, ctx.argmax) 27 | 28 | return output 29 | 30 | def backward(ctx, grad_output): 31 | assert(ctx.feature_size is not None and grad_output.is_cuda) 32 | batch_size, num_channels, data_height, data_width = ctx.feature_size 33 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 34 | 35 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 36 | grad_output, ctx.rois, grad_input, ctx.argmax) 37 | 38 | return grad_input, None 39 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/modules/__init__.py -------------------------------------------------------------------------------- /lib/model/roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 15 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/src/roi_pooling.cu.o -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /lib/model/roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /lib/model/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/rpn/__init__.py -------------------------------------------------------------------------------- /lib/model/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | 9 | import numpy as np 10 | import pdb 11 | 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 13 | # 14 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 15 | # >> anchors 16 | # 17 | # anchors = 18 | # 19 | # -83 -39 100 56 20 | # -175 -87 192 104 21 | # -359 -183 376 200 22 | # -55 -55 72 72 23 | # -119 -119 136 136 24 | # -247 -247 264 264 25 | # -35 -79 52 96 26 | # -79 -167 96 184 27 | # -167 -343 184 360 28 | 29 | #array([[ -83., -39., 100., 56.], 30 | # [-175., -87., 192., 104.], 31 | # [-359., -183., 376., 200.], 32 | # [ -55., -55., 72., 72.], 33 | # [-119., -119., 136., 136.], 34 | # [-247., -247., 264., 264.], 35 | # [ -35., -79., 52., 96.], 36 | # [ -79., -167., 96., 184.], 37 | # [-167., -343., 184., 360.]]) 38 | 39 | try: 40 | xrange # Python 2 41 | except NameError: 42 | xrange = range # Python 3 43 | 44 | 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 46 | scales=2**np.arange(3, 6)): 47 | """ 48 | Generate anchor (reference) windows by enumerating aspect ratios X 49 | scales wrt a reference (0, 0, 15, 15) window. 50 | """ 51 | 52 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 53 | ratio_anchors = _ratio_enum(base_anchor, ratios) 54 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 55 | for i in xrange(ratio_anchors.shape[0])]) 56 | return anchors 57 | 58 | def _whctrs(anchor): 59 | """ 60 | Return width, height, x center, and y center for an anchor (window). 61 | """ 62 | 63 | w = anchor[2] - anchor[0] + 1 64 | h = anchor[3] - anchor[1] + 1 65 | x_ctr = anchor[0] + 0.5 * (w - 1) 66 | y_ctr = anchor[1] + 0.5 * (h - 1) 67 | return w, h, x_ctr, y_ctr 68 | 69 | def _mkanchors(ws, hs, x_ctr, y_ctr): 70 | """ 71 | Given a vector of widths (ws) and heights (hs) around a center 72 | (x_ctr, y_ctr), output a set of anchors (windows). 73 | """ 74 | 75 | ws = ws[:, np.newaxis] 76 | hs = hs[:, np.newaxis] 77 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 78 | y_ctr - 0.5 * (hs - 1), 79 | x_ctr + 0.5 * (ws - 1), 80 | y_ctr + 0.5 * (hs - 1))) 81 | return anchors 82 | 83 | def _ratio_enum(anchor, ratios): 84 | """ 85 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 86 | """ 87 | 88 | w, h, x_ctr, y_ctr = _whctrs(anchor) 89 | size = w * h 90 | size_ratios = size / ratios 91 | ws = np.round(np.sqrt(size_ratios)) 92 | hs = np.round(ws * ratios) 93 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 94 | return anchors 95 | 96 | def _scale_enum(anchor, scales): 97 | """ 98 | Enumerate a set of anchors for each scale wrt an anchor. 99 | """ 100 | 101 | w, h, x_ctr, y_ctr = _whctrs(anchor) 102 | ws = w * scales 103 | hs = h * scales 104 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 105 | return anchors 106 | 107 | if __name__ == '__main__': 108 | import time 109 | t = time.time() 110 | a = generate_anchors() 111 | print(time.time() - t) 112 | print(a) 113 | from IPython import embed; embed() 114 | -------------------------------------------------------------------------------- /lib/model/rpn/proposal_layer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | # -------------------------------------------------------- 9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu 10 | # -------------------------------------------------------- 11 | 12 | import torch 13 | import torch.nn as nn 14 | import numpy as np 15 | import math 16 | import yaml 17 | from model.utils.config import cfg 18 | from .generate_anchors import generate_anchors 19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch 20 | from model.nms.nms_wrapper import nms 21 | 22 | import pdb 23 | 24 | DEBUG = False 25 | 26 | class _ProposalLayer(nn.Module): 27 | """ 28 | Outputs object detection proposals by applying estimated bounding-box 29 | transformations to a set of regular boxes (called "anchors"). 30 | """ 31 | 32 | def __init__(self, feat_stride, scales, ratios): 33 | super(_ProposalLayer, self).__init__() 34 | 35 | self._feat_stride = feat_stride 36 | self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 37 | ratios=np.array(ratios))).float() 38 | self._num_anchors = self._anchors.size(0) 39 | 40 | # rois blob: holds R regions of interest, each is a 5-tuple 41 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 42 | # rectangle (x1, y1, x2, y2) 43 | # top[0].reshape(1, 5) 44 | # 45 | # # scores blob: holds scores for R regions of interest 46 | # if len(top) > 1: 47 | # top[1].reshape(1, 1, 1, 1) 48 | 49 | def forward(self, input): 50 | 51 | # Algorithm: 52 | # 53 | # for each (H, W) location i 54 | # generate A anchor boxes centered on cell i 55 | # apply predicted bbox deltas at cell i to each of the A anchors 56 | # clip predicted boxes to image 57 | # remove predicted boxes with either height or width < threshold 58 | # sort all (proposal, score) pairs by score from highest to lowest 59 | # take top pre_nms_topN proposals before NMS 60 | # apply NMS with threshold 0.7 to remaining proposals 61 | # take after_nms_topN proposals after NMS 62 | # return the top proposals (-> RoIs top, scores top) 63 | 64 | 65 | # the first set of _num_anchors channels are bg probs 66 | # the second set are the fg probs 67 | scores = input[0][:, self._num_anchors:, :, :] 68 | bbox_deltas = input[1] 69 | im_info = input[2] 70 | cfg_key = input[3] 71 | 72 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 73 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 74 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 75 | min_size = cfg[cfg_key].RPN_MIN_SIZE 76 | 77 | batch_size = bbox_deltas.size(0) 78 | 79 | feat_height, feat_width = scores.size(2), scores.size(3) 80 | shift_x = np.arange(0, feat_width) * self._feat_stride 81 | shift_y = np.arange(0, feat_height) * self._feat_stride 82 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 83 | shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), 84 | shift_x.ravel(), shift_y.ravel())).transpose()) 85 | shifts = shifts.contiguous().type_as(scores).float() 86 | 87 | A = self._num_anchors 88 | K = shifts.size(0) 89 | 90 | self._anchors = self._anchors.type_as(scores) 91 | # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() 92 | anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) 93 | anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) 94 | 95 | # Transpose and reshape predicted bbox transformations to get them 96 | # into the same order as the anchors: 97 | 98 | bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() 99 | bbox_deltas = bbox_deltas.view(batch_size, -1, 4) 100 | 101 | # Same story for the scores: 102 | scores = scores.permute(0, 2, 3, 1).contiguous() 103 | scores = scores.view(batch_size, -1) 104 | 105 | # Convert anchors into proposals via bbox transformations 106 | proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) 107 | 108 | # 2. clip predicted boxes to image 109 | proposals = clip_boxes(proposals, im_info, batch_size) 110 | # proposals = clip_boxes_batch(proposals, im_info, batch_size) 111 | 112 | # assign the score to 0 if it's non keep. 113 | # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) 114 | 115 | # trim keep index to make it euqal over batch 116 | # keep_idx = torch.cat(tuple(keep_idx), 0) 117 | 118 | # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) 119 | # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) 120 | 121 | # _, order = torch.sort(scores_keep, 1, True) 122 | 123 | scores_keep = scores 124 | proposals_keep = proposals 125 | _, order = torch.sort(scores_keep, 1, True) 126 | 127 | output = scores.new(batch_size, post_nms_topN, 5).zero_() 128 | for i in range(batch_size): 129 | # # 3. remove predicted boxes with either height or width < threshold 130 | # # (NOTE: convert min_size to input image scale stored in im_info[2]) 131 | proposals_single = proposals_keep[i] 132 | scores_single = scores_keep[i] 133 | 134 | # # 4. sort all (proposal, score) pairs by score from highest to lowest 135 | # # 5. take top pre_nms_topN (e.g. 6000) 136 | order_single = order[i] 137 | 138 | if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): 139 | order_single = order_single[:pre_nms_topN] 140 | 141 | proposals_single = proposals_single[order_single, :] 142 | scores_single = scores_single[order_single].view(-1,1) 143 | 144 | # 6. apply nms (e.g. threshold = 0.7) 145 | # 7. take after_nms_topN (e.g. 300) 146 | # 8. return the top proposals (-> RoIs top) 147 | 148 | keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) 149 | keep_idx_i = keep_idx_i.long().view(-1) 150 | 151 | if post_nms_topN > 0: 152 | keep_idx_i = keep_idx_i[:post_nms_topN] 153 | proposals_single = proposals_single[keep_idx_i, :] 154 | scores_single = scores_single[keep_idx_i, :] 155 | 156 | # padding 0 at the end. 157 | num_proposal = proposals_single.size(0) 158 | output[i,:,0] = i 159 | output[i,:num_proposal,1:] = proposals_single 160 | 161 | return output 162 | 163 | def backward(self, top, propagate_down, bottom): 164 | """This layer does not propagate gradients.""" 165 | pass 166 | 167 | def reshape(self, bottom, top): 168 | """Reshaping happens during the call to forward.""" 169 | pass 170 | 171 | def _filter_boxes(self, boxes, min_size): 172 | """Remove all boxes with any side smaller than min_size.""" 173 | ws = boxes[:, :, 2] - boxes[:, :, 0] + 1 174 | hs = boxes[:, :, 3] - boxes[:, :, 1] + 1 175 | keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs))) 176 | return keep 177 | -------------------------------------------------------------------------------- /lib/model/rpn/proposal_layer_region.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | # -------------------------------------------------------- 9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu 10 | # -------------------------------------------------------- 11 | 12 | import torch 13 | import torch.nn as nn 14 | import numpy as np 15 | import math 16 | import yaml 17 | from model.utils.config import cfg 18 | from .generate_anchors import generate_anchors 19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch 20 | from model.nms.nms_wrapper import nms 21 | 22 | import pdb 23 | 24 | DEBUG = False 25 | 26 | class _ProposalLayer(nn.Module): 27 | """ 28 | Outputs object detection proposals by applying estimated bounding-box 29 | transformations to a set of regular boxes (called "anchors"). 30 | """ 31 | 32 | def __init__(self, feat_stride, scales, ratios): 33 | super(_ProposalLayer, self).__init__() 34 | 35 | self._feat_stride = feat_stride 36 | self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 37 | ratios=np.array(ratios))).float() 38 | self._num_anchors = self._anchors.size(0) 39 | 40 | # rois blob: holds R regions of interest, each is a 5-tuple 41 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 42 | # rectangle (x1, y1, x2, y2) 43 | # top[0].reshape(1, 5) 44 | # 45 | # # scores blob: holds scores for R regions of interest 46 | # if len(top) > 1: 47 | # top[1].reshape(1, 1, 1, 1) 48 | 49 | def forward(self, input): 50 | 51 | # Algorithm: 52 | # 53 | # for each (H, W) location i 54 | # generate A anchor boxes centered on cell i 55 | # apply predicted bbox deltas at cell i to each of the A anchors 56 | # clip predicted boxes to image 57 | # remove predicted boxes with either height or width < threshold 58 | # sort all (proposal, score) pairs by score from highest to lowest 59 | # take top pre_nms_topN proposals before NMS 60 | # apply NMS with threshold 0.7 to remaining proposals 61 | # take after_nms_topN proposals after NMS 62 | # return the top proposals (-> RoIs top, scores top) 63 | 64 | 65 | # the first set of _num_anchors channels are bg probs 66 | # the second set are the fg probs 67 | scores = input[0][:, self._num_anchors:, :, :] 68 | bbox_deltas = input[1] 69 | im_info = input[2] 70 | cfg_key = input[3] 71 | 72 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 73 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 74 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 75 | min_size = cfg[cfg_key].RPN_MIN_SIZE 76 | 77 | batch_size = bbox_deltas.size(0) 78 | 79 | feat_height, feat_width = scores.size(2), scores.size(3) 80 | shift_x = np.arange(0, feat_width) * self._feat_stride 81 | shift_y = np.arange(0, feat_height) * self._feat_stride 82 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 83 | shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), 84 | shift_x.ravel(), shift_y.ravel())).transpose()) 85 | shifts = shifts.contiguous().type_as(scores).float() 86 | 87 | A = self._num_anchors 88 | K = shifts.size(0) 89 | 90 | self._anchors = self._anchors.type_as(scores) 91 | # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() 92 | anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) 93 | anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) 94 | 95 | # Transpose and reshape predicted bbox transformations to get them 96 | # into the same order as the anchors: 97 | 98 | bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() 99 | bbox_deltas = bbox_deltas.view(batch_size, -1, 4) 100 | 101 | # Same story for the scores: 102 | scores = scores.permute(0, 2, 3, 1).contiguous() 103 | scores = scores.view(batch_size, -1) 104 | 105 | # Convert anchors into proposals via bbox transformations 106 | proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) 107 | 108 | # 2. clip predicted boxes to image 109 | proposals = clip_boxes(proposals, im_info, batch_size) 110 | # proposals = clip_boxes_batch(proposals, im_info, batch_size) 111 | 112 | # assign the score to 0 if it's non keep. 113 | # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) 114 | 115 | # trim keep index to make it euqal over batch 116 | # keep_idx = torch.cat(tuple(keep_idx), 0) 117 | 118 | # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) 119 | # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) 120 | 121 | # _, order = torch.sort(scores_keep, 1, True) 122 | 123 | scores_keep = scores 124 | proposals_keep = proposals 125 | _, order = torch.sort(scores_keep, 1, True) 126 | 127 | output = scores.new(batch_size, post_nms_topN, 5).zero_() 128 | output_cls_score = scores.new(batch_size, post_nms_topN, 2).zero_() 129 | for i in range(batch_size): 130 | # # 3. remove predicted boxes with either height or width < threshold 131 | # # (NOTE: convert min_size to input image scale stored in im_info[2]) 132 | proposals_single = proposals_keep[i] 133 | scores_single = scores_keep[i] 134 | 135 | # # 4. sort all (proposal, score) pairs by score from highest to lowest 136 | # # 5. take top pre_nms_topN (e.g. 6000) 137 | order_single = order[i] 138 | 139 | if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): 140 | order_single = order_single[:pre_nms_topN] 141 | 142 | proposals_single = proposals_single[order_single, :] 143 | scores_single = scores_single[order_single].view(-1,1) 144 | 145 | # 6. apply nms (e.g. threshold = 0.7) 146 | # 7. take after_nms_topN (e.g. 300) 147 | # 8. return the top proposals (-> RoIs top) 148 | 149 | keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) 150 | keep_idx_i = keep_idx_i.long().view(-1) 151 | 152 | if post_nms_topN > 0: 153 | keep_idx_i = keep_idx_i[:post_nms_topN] 154 | proposals_single = proposals_single[keep_idx_i, :] 155 | scores_single = scores_single[keep_idx_i, :] 156 | 157 | # padding 0 at the end. 158 | num_proposal = proposals_single.size(0) 159 | output[i,:,0] = i 160 | output[i,:num_proposal,1:] = proposals_single 161 | output_cls_score[i,:,0] = i 162 | output_cls_score[i,:num_proposal,1] = scores_single 163 | 164 | return output, output_cls_score 165 | 166 | def backward(self, top, propagate_down, bottom): 167 | """This layer does not propagate gradients.""" 168 | pass 169 | 170 | def reshape(self, bottom, top): 171 | """Reshaping happens during the call to forward.""" 172 | pass 173 | 174 | def _filter_boxes(self, boxes, min_size): 175 | """Remove all boxes with any side smaller than min_size.""" 176 | ws = boxes[:, :, 2] - boxes[:, :, 0] + 1 177 | hs = boxes[:, :, 3] - boxes[:, :, 1] + 1 178 | keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs))) 179 | return keep 180 | -------------------------------------------------------------------------------- /lib/model/rpn/rpn_region.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | 7 | from model.utils.config import cfg 8 | from .proposal_layer_region import _ProposalLayer 9 | from .anchor_target_layer import _AnchorTargetLayer 10 | from model.utils.net_utils import _smooth_l1_loss 11 | 12 | import numpy as np 13 | import math 14 | import pdb 15 | import time 16 | 17 | class _RPN(nn.Module): 18 | """ region proposal network """ 19 | def __init__(self, din): 20 | super(_RPN, self).__init__() 21 | 22 | self.din = din # get depth of input feature map, e.g., 512 23 | self.anchor_scales = cfg.ANCHOR_SCALES 24 | self.anchor_ratios = cfg.ANCHOR_RATIOS 25 | self.feat_stride = cfg.FEAT_STRIDE[0] 26 | 27 | # define the convrelu layers processing input feature map 28 | self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True) 29 | 30 | # define bg/fg classifcation score layer 31 | self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors) 32 | self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) 33 | 34 | # define anchor box offset prediction layer 35 | self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors) 36 | self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0) 37 | 38 | # define proposal layer 39 | self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios) 40 | 41 | # define anchor target layer 42 | self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios) 43 | 44 | self.rpn_loss_cls = 0 45 | self.rpn_loss_box = 0 46 | 47 | @staticmethod 48 | def reshape(x, d): 49 | input_shape = x.size() 50 | x = x.view( 51 | input_shape[0], 52 | int(d), 53 | int(float(input_shape[1] * input_shape[2]) / float(d)), 54 | input_shape[3] 55 | ) 56 | return x 57 | 58 | def forward(self, base_feat, im_info, gt_boxes, num_boxes): 59 | 60 | batch_size = base_feat.size(0) 61 | 62 | # return feature map after convrelu layer 63 | rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) 64 | # get rpn classification score 65 | rpn_cls_score = self.RPN_cls_score(rpn_conv1) 66 | 67 | rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) 68 | rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape,dim=1) 69 | rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) 70 | 71 | # get rpn offsets to the anchor boxes 72 | rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1) 73 | 74 | # proposal layer 75 | cfg_key = 'TRAIN' if self.training else 'TEST' 76 | 77 | rois, output_cls_score= self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data, 78 | im_info, cfg_key)) 79 | 80 | self.rpn_loss_cls = 0 81 | self.rpn_loss_box = 0 82 | 83 | # generating training labels and build the rpn loss 84 | if self.training: 85 | assert gt_boxes is not None 86 | 87 | rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes)) 88 | 89 | # compute classification loss 90 | rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2) 91 | rpn_label = rpn_data[0].view(batch_size, -1) 92 | 93 | rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) 94 | rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep) 95 | rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data) 96 | rpn_label = Variable(rpn_label.long()) 97 | self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) 98 | fg_cnt = torch.sum(rpn_label.data.ne(0)) 99 | 100 | rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:] 101 | 102 | # compute bbox regression loss 103 | rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) 104 | rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) 105 | rpn_bbox_targets = Variable(rpn_bbox_targets) 106 | 107 | self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, 108 | rpn_bbox_outside_weights, sigma=3, dim=[1,2,3]) 109 | 110 | return rois, output_cls_score, self.rpn_loss_cls, self.rpn_loss_box 111 | 112 | 113 | -------------------------------------------------------------------------------- /lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /lib/model/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes, 16 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 17 | return bbox_overlaps_c(boxes, query_boxes) 18 | 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 20 | np.ndarray[DTYPE_t, ndim=2] boxes, 21 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 22 | """ 23 | Parameters 24 | ---------- 25 | boxes: (N, 4) ndarray of float 26 | query_boxes: (K, 4) ndarray of float 27 | Returns 28 | ------- 29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 30 | """ 31 | cdef unsigned int N = boxes.shape[0] 32 | cdef unsigned int K = query_boxes.shape[0] 33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 34 | cdef DTYPE_t iw, ih, box_area 35 | cdef DTYPE_t ua 36 | cdef unsigned int k, n 37 | for k in range(K): 38 | box_area = ( 39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 41 | ) 42 | for n in range(N): 43 | iw = ( 44 | min(boxes[n, 2], query_boxes[k, 2]) - 45 | max(boxes[n, 0], query_boxes[k, 0]) + 1 46 | ) 47 | if iw > 0: 48 | ih = ( 49 | min(boxes[n, 3], query_boxes[k, 3]) - 50 | max(boxes[n, 1], query_boxes[k, 1]) + 1 51 | ) 52 | if ih > 0: 53 | ua = float( 54 | (boxes[n, 2] - boxes[n, 0] + 1) * 55 | (boxes[n, 3] - boxes[n, 1] + 1) + 56 | box_area - iw * ih 57 | ) 58 | overlaps[n, k] = iw * ih / ua 59 | return overlaps 60 | 61 | 62 | def bbox_intersections( 63 | np.ndarray[DTYPE_t, ndim=2] boxes, 64 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 65 | return bbox_intersections_c(boxes, query_boxes) 66 | 67 | 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 69 | np.ndarray[DTYPE_t, ndim=2] boxes, 70 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 71 | """ 72 | For each query box compute the intersection ratio covered by boxes 73 | ---------- 74 | Parameters 75 | ---------- 76 | boxes: (N, 4) ndarray of float 77 | query_boxes: (K, 4) ndarray of float 78 | Returns 79 | ------- 80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 81 | """ 82 | cdef unsigned int N = boxes.shape[0] 83 | cdef unsigned int K = query_boxes.shape[0] 84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 85 | cdef DTYPE_t iw, ih, box_area 86 | cdef DTYPE_t ua 87 | cdef unsigned int k, n 88 | for k in range(K): 89 | box_area = ( 90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 92 | ) 93 | for n in range(N): 94 | iw = ( 95 | min(boxes[n, 2], query_boxes[k, 2]) - 96 | max(boxes[n, 0], query_boxes[k, 0]) + 1 97 | ) 98 | if iw > 0: 99 | ih = ( 100 | min(boxes[n, 3], query_boxes[k, 3]) - 101 | max(boxes[n, 1], query_boxes[k, 1]) + 1 102 | ) 103 | if ih > 0: 104 | intersec[n, k] = iw * ih / box_area 105 | return intersec -------------------------------------------------------------------------------- /lib/model/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | # from scipy.misc import imread, imresize 12 | import cv2 13 | 14 | try: 15 | xrange # Python 2 16 | except NameError: 17 | xrange = range # Python 3 18 | 19 | 20 | def im_list_to_blob(ims): 21 | """Convert a list of images into a network input. 22 | 23 | Assumes images are already prepared (means subtracted, BGR order, ...). 24 | """ 25 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 26 | num_images = len(ims) 27 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 28 | dtype=np.float32) 29 | for i in xrange(num_images): 30 | im = ims[i] 31 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 32 | 33 | return blob 34 | 35 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 36 | """Mean subtract and scale an image for use in a blob.""" 37 | 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | # im = im[:, :, ::-1] 41 | im_shape = im.shape 42 | im_size_min = np.min(im_shape[0:2]) 43 | im_size_max = np.max(im_shape[0:2]) 44 | im_scale = float(target_size) / float(im_size_min) 45 | # Prevent the biggest axis from being more than MAX_SIZE 46 | # if np.round(im_scale * im_size_max) > max_size: 47 | # im_scale = float(max_size) / float(im_size_max) 48 | # im = imresize(im, im_scale) 49 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 50 | interpolation=cv2.INTER_LINEAR) 51 | 52 | return im, im_scale 53 | -------------------------------------------------------------------------------- /lib/pycocotools/UPSTREAM_REV: -------------------------------------------------------------------------------- 1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574 2 | -------------------------------------------------------------------------------- /lib/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /lib/pycocotools/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /lib/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | from . import _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | encode = _mask.encode 77 | decode = _mask.decode 78 | iou = _mask.iou 79 | merge = _mask.merge 80 | area = _mask.area 81 | toBbox = _mask.toBbox 82 | frPyObjects = _mask.frPyObjects -------------------------------------------------------------------------------- /lib/pycocotools/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | #include 9 | 10 | typedef unsigned int uint; 11 | typedef unsigned long siz; 12 | typedef unsigned char byte; 13 | typedef double* BB; 14 | typedef struct { siz h, w, m; uint *cnts; } RLE; 15 | 16 | // Initialize/destroy RLE. 17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 18 | void rleFree( RLE *R ); 19 | 20 | // Initialize/destroy RLE array. 21 | void rlesInit( RLE **R, siz n ); 22 | void rlesFree( RLE **R, siz n ); 23 | 24 | // Encode binary masks using RLE. 25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 26 | 27 | // Decode binary masks encoded via RLE. 28 | void rleDecode( const RLE *R, byte *mask, siz n ); 29 | 30 | // Compute union or intersection of encoded masks. 31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect ); 32 | 33 | // Compute area of encoded masks. 34 | void rleArea( const RLE *R, siz n, uint *a ); 35 | 36 | // Compute intersection over union between masks. 37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 38 | 39 | // Compute intersection over union between bounding boxes. 40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 41 | 42 | // Get bounding boxes surrounding encoded masks. 43 | void rleToBbox( const RLE *R, BB bb, siz n ); 44 | 45 | // Convert bounding boxes to encoded masks. 46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 47 | 48 | // Convert polygon to encoded mask. 49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 50 | 51 | // Get compressed string representation of encoded mask. 52 | char* rleToString( const RLE *R ); 53 | 54 | // Convert from compressed string representation of encoded mask. 55 | void rleFrString( RLE *R, char *s, siz h, siz w ); 56 | -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Xinlei Chen 6 | # -------------------------------------------------------- 7 | 8 | """Compute minibatch blobs for training a Fast R-CNN network.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | import numpy as np 14 | # import numpy.random as npr 15 | from scipy.misc import imread 16 | from model.utils.config import cfg 17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob 18 | import pdb 19 | 20 | def get_minibatch(roidb, num_classes, random_scale_inds): 21 | """Given a roidb, construct a minibatch sampled from it.""" 22 | num_images = len(roidb) 23 | # Sample random scales to use for each image in this batch 24 | # random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), 25 | # size=num_images) 26 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 27 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 28 | format(num_images, cfg.TRAIN.BATCH_SIZE) 29 | 30 | # Get the input image blob, formatted for caffe 31 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 32 | 33 | blobs = {'data': im_blob} 34 | 35 | assert len(im_scales) == 1, "Single batch only" 36 | assert len(roidb) == 1, "Single batch only" 37 | 38 | # gt boxes: (x1, y1, x2, y2, cls) 39 | if cfg.TRAIN.USE_ALL_GT: 40 | # Include all ground truth boxes 41 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 42 | else: 43 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 44 | gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 45 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 46 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 47 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 48 | blobs['gt_boxes'] = gt_boxes 49 | blobs['im_info'] = np.array( 50 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], 51 | dtype=np.float32) 52 | 53 | blobs['img_id'] = roidb[0]['img_id'] 54 | 55 | return blobs 56 | 57 | def _get_image_blob(roidb, scale_inds): 58 | """Builds an input blob from the images in the roidb at the specified 59 | scales. 60 | """ 61 | num_images = len(roidb) 62 | 63 | processed_ims = [] 64 | im_scales = [] 65 | for i in range(num_images): 66 | #im = cv2.imread(roidb[i]['image']) # BGR 67 | im = imread(roidb[i]['image']) # RGB 68 | 69 | # tile channels for 1-channel images 70 | if len(im.shape) == 2: 71 | im = im[:, :, np.newaxis] 72 | im = np.concatenate((im, im, im), axis=2) 73 | 74 | # drop the last channel for 4-channel images 75 | if im.shape[-1] == 4: 76 | im = im[:, :, :-1] 77 | 78 | # rgb -> bgr 79 | im = im[:, :, ::-1] 80 | 81 | if roidb[i]['flipped']: 82 | im = im[:, ::-1, :] 83 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 84 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE) 85 | im_scales.append(im_scale) 86 | processed_ims.append(im) 87 | 88 | # Create a blob to hold the input images 89 | blob = im_list_to_blob(processed_ims) 90 | 91 | return blob, im_scales 92 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | import os 10 | from os.path import join as pjoin 11 | import numpy as np 12 | from distutils.core import setup 13 | from distutils.extension import Extension 14 | from Cython.Distutils import build_ext 15 | 16 | 17 | def find_in_path(name, path): 18 | "Find a file in a search path" 19 | # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 20 | for dir in path.split(os.pathsep): 21 | binpath = pjoin(dir, name) 22 | if os.path.exists(binpath): 23 | return os.path.abspath(binpath) 24 | return None 25 | 26 | 27 | # def locate_cuda(): 28 | # """Locate the CUDA environment on the system 29 | # 30 | # Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 31 | # and values giving the absolute path to each directory. 32 | # 33 | # Starts by looking for the CUDAHOME env variable. If not found, everything 34 | # is based on finding 'nvcc' in the PATH. 35 | # """ 36 | # 37 | # # first check if the CUDAHOME env variable is in use 38 | # if 'CUDAHOME' in os.environ: 39 | # home = os.environ['CUDAHOME'] 40 | # nvcc = pjoin(home, 'bin', 'nvcc') 41 | # else: 42 | # # otherwise, search the PATH for NVCC 43 | # default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 44 | # nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 45 | # if nvcc is None: 46 | # raise EnvironmentError('The nvcc binary could not be ' 47 | # 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 48 | # home = os.path.dirname(os.path.dirname(nvcc)) 49 | # 50 | # cudaconfig = {'home': home, 'nvcc': nvcc, 51 | # 'include': pjoin(home, 'include'), 52 | # 'lib64': pjoin(home, 'lib64')} 53 | # for k, v in cudaconfig.iteritems(): 54 | # if not os.path.exists(v): 55 | # raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 56 | # 57 | # return cudaconfig 58 | 59 | 60 | # CUDA = locate_cuda() 61 | 62 | # Obtain the numpy include directory. This logic works across numpy versions. 63 | try: 64 | numpy_include = np.get_include() 65 | except AttributeError: 66 | numpy_include = np.get_numpy_include() 67 | 68 | 69 | def customize_compiler_for_nvcc(self): 70 | """inject deep into distutils to customize how the dispatch 71 | to gcc/nvcc works. 72 | 73 | If you subclass UnixCCompiler, it's not trivial to get your subclass 74 | injected in, and still have the right customizations (i.e. 75 | distutils.sysconfig.customize_compiler) run on it. So instead of going 76 | the OO route, I have this. Note, it's kindof like a wierd functional 77 | subclassing going on.""" 78 | 79 | # tell the compiler it can processes .cu 80 | self.src_extensions.append('.cu') 81 | 82 | # save references to the default compiler_so and _comple methods 83 | default_compiler_so = self.compiler_so 84 | super = self._compile 85 | 86 | # now redefine the _compile method. This gets executed for each 87 | # object but distutils doesn't have the ability to change compilers 88 | # based on source extension: we add it. 89 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 90 | print(extra_postargs) 91 | if os.path.splitext(src)[1] == '.cu': 92 | # use the cuda for .cu files 93 | self.set_executable('compiler_so', CUDA['nvcc']) 94 | # use only a subset of the extra_postargs, which are 1-1 translated 95 | # from the extra_compile_args in the Extension class 96 | postargs = extra_postargs['nvcc'] 97 | else: 98 | postargs = extra_postargs['gcc'] 99 | 100 | super(obj, src, ext, cc_args, postargs, pp_opts) 101 | # reset the default compiler_so, which we might have changed for cuda 102 | self.compiler_so = default_compiler_so 103 | 104 | # inject our redefined _compile method into the class 105 | self._compile = _compile 106 | 107 | 108 | # run the customize_compiler 109 | class custom_build_ext(build_ext): 110 | def build_extensions(self): 111 | customize_compiler_for_nvcc(self.compiler) 112 | build_ext.build_extensions(self) 113 | 114 | 115 | ext_modules = [ 116 | Extension( 117 | "model.utils.cython_bbox", 118 | ["model/utils/bbox.pyx"], 119 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 120 | include_dirs=[numpy_include] 121 | ), 122 | Extension( 123 | 'pycocotools._mask', 124 | sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'], 125 | include_dirs=[numpy_include, 'pycocotools'], 126 | extra_compile_args={ 127 | 'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']}, 128 | ), 129 | ] 130 | 131 | setup( 132 | name='faster_rcnn', 133 | ext_modules=ext_modules, 134 | # inject our custom trigger 135 | cmdclass={'build_ext': custom_build_ext}, 136 | ) 137 | -------------------------------------------------------------------------------- /run/finetune_coco.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/COCO" 4 | 5 | # number of shots 6 | for j in 10 30 7 | do 8 | # few-shot fine-tuning 9 | python train.py --dataset coco \ 10 | --epochs 30 --bs 4 --nw 8 \ 11 | --log_dir checkpoint --save_dir $base_dir \ 12 | --r True --checksession 200 --checkepoch 20 \ 13 | --meta_type 0 --shots $j --phase 2 --meta_train True --meta_loss True 14 | done 15 | -------------------------------------------------------------------------------- /run/finetune_coco_TFA.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/COCO" 4 | 5 | # number of shots 6 | for j in 1 2 3 5 10 30 7 | do 8 | # few-shot fine-tuning 9 | CUDA_VISIBLE_DEVICES=3 python train.py --dataset coco \ 10 | --epochs 30 --bs 4 --nw 8 --log_dir checkpoint --save_dir $base_dir \ 11 | --r True --checksession 200 --checkepoch 20 --meta_type 0 --shots $j --phase 2 \ 12 | --meta_train True --meta_loss True --TFA True 13 | 14 | CUDA_VISIBLE_DEVICES=3 python test.py --dataset coco \ 15 | --load_dir $base_dir --meta_type 0 \ 16 | --checksession $j --checkepoch 29 --shots $j \ 17 | --phase 2 --meta_test True --meta_loss True 18 | done -------------------------------------------------------------------------------- /run/finetune_voc_first.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_first" 4 | 5 | # number of shots 6 | for j in 1 2 3 5 10 7 | do 8 | # few-shot fine-tuning 9 | python train.py --dataset pascal_voc_0712 \ 10 | --epochs 30 --bs 4 --nw 8 \ 11 | --log_dir checkpoint --save_dir $base_dir \ 12 | --r True --checksession 200 --checkepoch 20 \ 13 | --meta_type 1 --shots $j --phase 2 --meta_train True --meta_loss True 14 | done 15 | -------------------------------------------------------------------------------- /run/finetune_voc_second.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_second" 4 | 5 | # number of shots 6 | for j in 1 2 3 5 10 7 | do 8 | # few-shot fine-tuning 9 | python train.py --dataset pascal_voc_0712 \ 10 | --epochs 30 --bs 4 --nw 8 \ 11 | --log_dir checkpoint --save_dir $base_dir \ 12 | --r True --checksession 200 --checkepoch 20 \ 13 | --meta_type 2 --shots $j --phase 2 --meta_train True --meta_loss True 14 | done 15 | -------------------------------------------------------------------------------- /run/finetune_voc_third.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_third" 4 | 5 | # number of shots 6 | for j in 1 2 3 5 10 7 | do 8 | # few-shot fine-tuning 9 | python train.py --dataset pascal_voc_0712 \ 10 | --epochs 30 --bs 4 --nw 8 \ 11 | --log_dir checkpoint --save_dir $base_dir \ 12 | --r True --checksession 200 --checkepoch 20 \ 13 | --meta_type 3 --shots $j --phase 2 --meta_train True --meta_loss True 14 | done 15 | -------------------------------------------------------------------------------- /run/test_coco.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/COCO" 4 | 5 | # number of shots 6 | for j in 10 30 7 | do 8 | # testing on base and novel class 9 | python test.py --dataset coco \ 10 | --load_dir $base_dir --meta_type 0 \ 11 | --checksession $j --checkepoch 29 --shots $j \ 12 | --phase 2 --meta_test True --meta_loss True 13 | done -------------------------------------------------------------------------------- /run/test_voc_first.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_first" 4 | 5 | # number of shots 6 | for j in 1 2 3 5 10 7 | do 8 | # testing on base and novel class 9 | python test.py --dataset pascal_voc_0712 \ 10 | --load_dir $base_dir --meta_type 1 \ 11 | --checksession $j --checkepoch 29 --shots $j \ 12 | --phase 2 --meta_test True --meta_loss True 13 | done 14 | -------------------------------------------------------------------------------- /run/test_voc_second.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_second" 4 | 5 | # number of shots 6 | for j in 1 2 3 5 10 7 | do 8 | # testing on base and novel class 9 | python test.py --dataset pascal_voc_0712 \ 10 | --load_dir $base_dir --meta_type 2 \ 11 | --checksession $j --checkepoch 29 --shots $j \ 12 | --phase 2 --meta_test True --meta_loss True 13 | done 14 | -------------------------------------------------------------------------------- /run/test_voc_third.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_third" 4 | 5 | # number of shots 6 | for j in 1 2 3 5 10 7 | do 8 | # testing on base and novel class 9 | python test.py --dataset pascal_voc_0712 \ 10 | --load_dir $base_dir --meta_type 3 \ 11 | --checksession $j --checkepoch 29 --shots $j \ 12 | --phase 2 --meta_test True --meta_loss True 13 | done 14 | -------------------------------------------------------------------------------- /run/train_coco.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/COCO" 4 | 5 | # base class training 6 | python train.py --dataset coco \ 7 | --epochs 21 --bs 4 --nw 8 \ 8 | --log_dir checkpoint --save_dir $base_dir \ 9 | --meta_type 0 --meta_train True --meta_loss True 10 | -------------------------------------------------------------------------------- /run/train_voc_first.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_first" 4 | 5 | # base class training 6 | python train.py --dataset pascal_voc_0712 \ 7 | --epochs 21 --bs 4 --nw 8 \ 8 | --log_dir checkpoint --save_dir $base_dir \ 9 | --meta_type 1 --meta_train True --meta_loss True -------------------------------------------------------------------------------- /run/train_voc_second.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_second" 4 | 5 | # base class training 6 | python train.py --dataset pascal_voc_0712 \ 7 | --epochs 21 --bs 4 --nw 8 \ 8 | --log_dir checkpoint --save_dir $base_dir \ 9 | --meta_type 2 --meta_train True --meta_loss True -------------------------------------------------------------------------------- /run/train_voc_third.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | base_dir="save_models/VOC_third" 4 | 5 | # base class training 6 | python train.py --dataset pascal_voc_0712 \ 7 | --epochs 21 --bs 4 --nw 8 \ 8 | --log_dir checkpoint --save_dir $base_dir \ 9 | --meta_type 3 --meta_train True --meta_loss True 10 | --------------------------------------------------------------------------------