├── .gitignore
├── LICENSE
├── README.md
├── _init_paths.py
├── cfgs
    ├── res101_ms.yml
    └── res50.yml
├── data
    ├── README.md
    └── VOCsplits
    │   ├── VOC2007
    │       ├── train_first_split.txt
    │       ├── train_second_split.txt
    │       ├── train_third_split.txt
    │       ├── voc_2007_train_first_split.txt
    │       ├── voc_2007_train_second_split.txt
    │       └── voc_2007_train_third_split.txt
    │   └── VOC2012
    │       ├── train_first_split.txt
    │       ├── train_second_split.txt
    │       ├── train_third_split.txt
    │       ├── voc_2012_train_first_split.txt
    │       ├── voc_2012_train_second_split.txt
    │       └── voc_2012_train_third_split.txt
├── download_models.sh
├── img
    └── PipelineDet.png
├── lib
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── custom.py
    │   ├── custom_eval.py
    │   ├── custom_metadata.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imdb.py
    │   ├── metadata.py
    │   ├── metadata_3d.py
    │   ├── metadata_TFA.py
    │   ├── metadata_coco.py
    │   ├── objectnet3d.py
    │   ├── pascal3d.py
    │   ├── pascal3d_eval.py
    │   ├── pascal_voc.py
    │   ├── pascal_voc_rbg.py
    │   ├── tools
    │   │   ├── compute_prior.py
    │   │   └── mcg_munge.py
    │   └── voc_eval.py
    ├── make.sh
    ├── model
    │   ├── __init__.py
    │   ├── faster_rcnn
    │   │   ├── __init__.py
    │   │   ├── faster_rcnn.py
    │   │   ├── resnet.py
    │   │   ├── trail.py
    │   │   └── vgg16.py
    │   ├── nms
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── nms
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── nms_cpu.py
    │   │   ├── nms_gpu.py
    │   │   ├── nms_kernel.cu
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cuda.c
    │   │   │   ├── nms_cuda.h
    │   │   │   ├── nms_cuda_kernel.cu
    │   │   │   ├── nms_cuda_kernel.cu.o
    │   │   │   └── nms_cuda_kernel.h
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.c
    │   │   │   ├── roi_align_cuda.h
    │   │   │   ├── roi_align_kernel.cu
    │   │   │   ├── roi_align_kernel.cu.o
    │   │   │   └── roi_align_kernel.h
    │   ├── roi_crop
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_crop
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── gridgen.py
    │   │   │   └── roi_crop.py
    │   │   └── src
    │   │   │   ├── roi_crop.c
    │   │   │   ├── roi_crop.h
    │   │   │   ├── roi_crop_cuda.c
    │   │   │   ├── roi_crop_cuda.h
    │   │   │   ├── roi_crop_cuda_kernel.cu
    │   │   │   ├── roi_crop_cuda_kernel.cu.o
    │   │   │   └── roi_crop_cuda_kernel.h
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pooling
    │   │   │   │   └── __init__.py
    │   │   ├── build.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pooling.c
    │   │   │   ├── roi_pooling.cu.o
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   ├── roi_pooling_cuda.h
    │   │   │   ├── roi_pooling_kernel.cu
    │   │   │   └── roi_pooling_kernel.h
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_target_layer.py
    │   │   ├── bbox_transform.py
    │   │   ├── generate_anchors.py
    │   │   ├── proposal_layer.py
    │   │   ├── proposal_layer_region.py
    │   │   ├── proposal_target_layer_cascade.py
    │   │   ├── proposal_target_layer_cascade_region.py
    │   │   ├── rpn.py
    │   │   └── rpn_region.py
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── bbox.pyx
    │   │   ├── blob.py
    │   │   ├── config.py
    │   │   └── net_utils.py
    ├── pycocotools
    │   ├── UPSTREAM_REV
    │   ├── __init__.py
    │   ├── _mask.c
    │   ├── _mask.pyx
    │   ├── coco.py
    │   ├── cocoeval.py
    │   ├── license.txt
    │   ├── mask.py
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── minibatch.py
    │   ├── roibatchLoader.py
    │   └── roidb.py
    └── setup.py
├── run
    ├── finetune_coco.sh
    ├── finetune_coco_TFA.sh
    ├── finetune_voc_first.sh
    ├── finetune_voc_second.sh
    ├── finetune_voc_third.sh
    ├── test_coco.sh
    ├── test_voc_first.sh
    ├── test_voc_second.sh
    ├── test_voc_third.sh
    ├── train_coco.sh
    ├── train_voc_first.sh
    ├── train_voc_second.sh
    └── train_voc_third.sh
├── spec-file.txt
├── test.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib64/
 18 | parts/
 19 | sdist/
 20 | var/
 21 | wheels/
 22 | pip-wheel-metadata/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | db.sqlite3-journal
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # IPython
 80 | profile_default/
 81 | ipython_config.py
 82 | 
 83 | # pyenv
 84 | .python-version
 85 | 
 86 | # pipenv
 87 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 88 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 89 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 90 | #   install all needed dependencies.
 91 | #Pipfile.lock
 92 | 
 93 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 94 | __pypackages__/
 95 | 
 96 | # Celery stuff
 97 | celerybeat-schedule
 98 | celerybeat.pid
 99 | 
100 | # SageMath parsed files
101 | *.sage.py
102 | 
103 | # Environments
104 | .env
105 | .venv
106 | env/
107 | venv/
108 | ENV/
109 | env.bak/
110 | venv.bak/
111 | 
112 | # Spyder project settings
113 | .spyderproject
114 | .spyproject
115 | 
116 | # Rope project settings
117 | .ropeproject
118 | 
119 | # mkdocs documentation
120 | /site
121 | 
122 | # mypy
123 | .mypy_cache/
124 | .dmypy.json
125 | dmypy.json
126 | 
127 | # Pyre type checker
128 | .pyre/
129 | 
130 | .idea/
131 | results/
132 | checkpoint/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | import os
 4 | 
 5 | if os.listdir('data/cache/'):
 6 |     os.system('rm data/cache/*')
 7 | 
 8 | def add_path(path):
 9 |     if path not in sys.path:
10 |         sys.path.insert(0, path)
11 | 
12 | this_dir = osp.dirname(__file__)
13 | 
14 | # Add lib to PYTHONPATH
15 | lib_path = osp.join(this_dir, 'lib')
16 | add_path(lib_path)
17 | 
18 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
19 | add_path(coco_path)
20 | 
21 | vg_path = osp.join(this_dir, 'data', 'vgapi')
22 | add_path(vg_path)
23 | 


--------------------------------------------------------------------------------
/cfgs/res101_ms.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   MAX_SIZE: 1000
13 |   SCALES: [600]
14 |   DOUBLE_BIAS: False
15 |   RCNN_BBOX_WEIGHT: 1
16 | TEST:
17 |   SCALES: [600]
18 |   HAS_RPN: True
19 | POOLING_SIZE: 7
20 | POOLING_MODE: align
21 | CROP_RESIZE_WITH_MAX_POOL: False
22 | 


--------------------------------------------------------------------------------
/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   MAX_SIZE: 1000
13 |   SCALES: [600]
14 |   DOUBLE_BIAS: False
15 |   RCNN_BBOX_WEIGHT: 1
16 | TEST:
17 |   SCALES: [600]
18 |   HAS_RPN: True
19 | POOLING_SIZE: 7
20 | POOLING_MODE: align
21 | CROP_RESIZE_WITH_MAX_POOL: False
22 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | # Few-Shot Object Detection (Data Preparation)
 2 | 
 3 | First go to the data dir ``cd ./data``
 4 | 
 5 | ## PASCAL VOC
 6 | 
 7 | Download data from official website:
 8 | ```bash
 9 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
10 | tar xvf VOCtrainval_06-Nov-2007.tar && rm VOCtrainval_06-Nov-2007.tar 
11 | 
12 | wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
13 | tar xvf VOCtest_06-Nov-2007.tar && rm VOCtest_06-Nov-2007.tar
14 | 
15 | wget host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
16 | tar xvf VOCtrainval_11-May-2012.tar && rm VOCtrainval_11-May-2012.tar 
17 | ```
18 | 
19 | Data structure should look like:
20 | ```
21 | data/VOCdevkit
22 |     VOC{2007,2012}/
23 |         Annotations/
24 |         ImageSets/
25 |         JPEGImages/
26 |         ...
27 | ```
28 | 
29 | Move the dataset split files to the correct path:
30 | ```bash
31 | mv  VOCsplits/VOC2007/*  VOCdevkit/VOC2007/ImageSets/Main/
32 | mv  VOCsplits/VOC2012/*  VOCdevkit/VOC2012/ImageSets/Main/  
33 | ```
34 | 
35 | 
36 | ## COCO
37 | 
38 | Download data from official website:
39 | ```bash
40 | mkdir coco
41 | 
42 | # download images
43 | cd coco
44 | mkdir images
45 | 
46 | wget images.cocodataset.org/zips/train2014.zip
47 | unzip train2014.zip && rm train2014.zip
48 | mv train2014 images/
49 | 
50 | wget images.cocodataset.org/zips/val2014.zip
51 | unzip val2014.zip && rm val2014.zip
52 | mv val2014 images/
53 | 
54 | # download annaotations
55 | cd ..
56 | wget images.cocodataset.org/annotations/annotations_trainval2014.zip
57 | unzip annotations_trainval2014.zip && rm annotations_trainval2014.zip
58 | ```
59 | 
60 | Data structure should look like:
61 | ```
62 | data/coco
63 |     annotations/
64 |     images/
65 |         train2014/
66 |         val2014/
67 | ```
68 | 
69 | 
70 | 
71 | ## Custom Dataset
72 | 
73 | To experiment with your own dataset, first save the dataset or create a soft link in the folder ```ProjectRootDir/data```
74 | Moreover, you should have a **csv-format** file containing all the annotations such as:
75 | ```
76 | set,im_path,cls,difficult,left,upper,right,lower,height,width
77 | train,ImagePathInDataset,cat,False,x1,y1,x2,y2,H,W
78 | train,ImagePathInDataset,dog,False,x1,y1,x2,y2,H,W
79 | val,ImagePathInDataset,cat,False,x1,y1,x2,y2,H,W
80 | val,ImagePathInDataset,dog,False,x1,y1,x2,y2,H,W
81 | ...
82 | ```
83 | 
84 | Then, look into [factory.py](https://github.com/YoungXIAO13/FewShotDetection/blob/4e8d0c4a0352133113b8438a6a5fa8195661c6c0/lib/datasets/factory.py#L21) to set up correctly the dataset root path and split names (train, val, etc).\
85 | Also, set up the novel classes for your custom dataset in the [config.py](https://github.com/YoungXIAO13/FewShotDetection/blob/1a77fbd81fb2f319e517c70faf5b9d7eca6b0546/lib/model/utils/config.py#L27)
86 | 
87 | Once this is done, look further into \
88 | [custom.py](https://github.com/YoungXIAO13/FewShotDetection/blob/master/lib/datasets/custom.py) for dataset creation;\
89 | [custom_metadata.py](https://github.com/YoungXIAO13/FewShotDetection/blob/master/lib/datasets/custom_metadata.py) for few-shot class data creation;\
90 | [custom_eval.py](https://github.com/YoungXIAO13/FewShotDetection/blob/master/lib/datasets/custom_eval.py) for evaluation.
91 | 
92 | Finally, you should set the correct values for custom dataset in [train.py](https://github.com/YoungXIAO13/FewShotDetection/blob/1a77fbd81fb2f319e517c70faf5b9d7eca6b0546/train.py#L198) and [test.py](https://github.com/YoungXIAO13/FewShotDetection/blob/1a77fbd81fb2f319e517c70faf5b9d7eca6b0546/test.py#L129)
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/download_models.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | wget https://www.dropbox.com/s/gemt3cxgr9b1aht/FewShotDetectionBaseModels.zip?dl=0 && mv FewShotDetectionBaseModels.zip?dl=0 FewShotDetectionBaseModels.zip
4 | 
5 | unzip FewShotDetectionBaseModels.zip && rm FewShotDetectionBaseModels.zip
6 | 


--------------------------------------------------------------------------------
/img/PipelineDet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/img/PipelineDet.png


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= ) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------


--------------------------------------------------------------------------------
/lib/datasets/custom.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import numpy as np
  3 | import scipy.sparse
  4 | import scipy.io as sio
  5 | import pickle
  6 | import pandas as pd
  7 | 
  8 | import datasets
  9 | import datasets.custom
 10 | from .custom_eval import custom_eval
 11 | from datasets.imdb import imdb
 12 | from model.utils.config import cfg
 13 | 
 14 | 
 15 | class custom(imdb):
 16 |     def __init__(self, image_set, data_path, csv_file='custom_dataset.txt'):
 17 |         imdb.__init__(self, 'custom_{}'.format(image_set))
 18 |         self._image_set = image_set
 19 |         self._data_path = data_path
 20 |         assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)
 21 | 
 22 |         df = pd.read_csv(os.path.join(data_path, csv_file))
 23 | 
 24 |         self.df = df[df.set == self._image_set]
 25 | 
 26 |         # Add novel classes after base classes [base / novel]
 27 |         self._classes = tuple(['__background__'] +
 28 |                               [c for c in np.unique(df.cls).tolist() if c not in cfg.CUSTOM_NOVEL_CLASSES] +
 29 |                               [c for c in np.unique(df.cls).tolist() if c in cfg.CUSTOM_NOVEL_CLASSES])
 30 |         self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
 31 | 
 32 |         # The image index is set to be the unique image path in the dataset
 33 |         self._image_index = np.unique(self.df.im_path).tolist()
 34 | 
 35 |         # Default to roidb handler
 36 |         self._roidb_handler = self.gt_roidb
 37 |         self._comp_id = 'comp4'
 38 | 
 39 |         # Specific config options
 40 |         self.config = {'cleanup': False}
 41 | 
 42 |     def image_id_at(self, i):
 43 |         """
 44 |         Return the absolute path to image i in the image sequence.
 45 |         """
 46 |         return i
 47 | 
 48 |     def image_path_at(self, i):
 49 |         """
 50 |         Return the absolute path to image i in the image sequence.
 51 |         """
 52 |         return self.image_path_from_index(self._image_index[i])
 53 | 
 54 |     def image_path_from_index(self, index):
 55 |         """
 56 |         Construct an image path from the image's "index" identifier.
 57 |         """
 58 |         image_path = os.path.join(self._data_path, index)
 59 |         assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
 60 |         return image_path
 61 | 
 62 |     def gt_roidb(self):
 63 |         """
 64 |         Return the database of ground-truth regions of interest.
 65 |         This function loads/saves from/to a cache file to speed up future calls.
 66 |         """
 67 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
 68 |         if os.path.exists(cache_file):
 69 |             with open(cache_file, 'rb') as fid:
 70 |                 roidb = pickle.load(fid)
 71 |             print('{} gt roidb loaded from {}'.format(self.name, cache_file))
 72 |             return roidb
 73 | 
 74 |         gt_roidb = [self._load_annotation(index) for index in self.image_index]
 75 |         with open(cache_file, 'wb') as fid:
 76 |             pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
 77 |         print('wrote gt roidb to {}'.format(cache_file))
 78 | 
 79 |         return gt_roidb
 80 | 
 81 |     def _load_annotation(self, index):
 82 |         """
 83 |         Load image and bounding boxes info from txt files of pascal3d.
 84 |         """
 85 | 
 86 |         objs = self.df[self.df.im_path == index]
 87 |         num_objs = len(objs)
 88 | 
 89 |         # original annotation for object detection
 90 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
 91 |         gt_classes = np.zeros(num_objs, dtype=np.int32)
 92 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
 93 | 
 94 |         # "Seg" area for pascal is just the box area
 95 |         seg_areas = np.zeros((num_objs), dtype=np.float32)
 96 |         ishards = np.zeros((num_objs), dtype=np.int32)
 97 | 
 98 |         # Load object annotation into a data frame.
 99 |         for ix in range(num_objs):
100 |             x1 = max(float(objs.iloc[ix]['left']), 0)
101 |             y1 = max(float(objs.iloc[ix]['upper']), 0)
102 |             x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1)
103 |             y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1)
104 |             cls = self._class_to_ind[objs.iloc[ix]['cls']]
105 |             boxes[ix, :] = [x1, y1, x2, y2]
106 |             gt_classes[ix] = cls
107 |             overlaps[ix, cls] = 1.0
108 | 
109 |             ishards[ix] = objs.iloc[ix]['difficult']
110 |             if cls not in self._classes:
111 |                 continue
112 |             seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
113 | 
114 |         overlaps = scipy.sparse.csr_matrix(overlaps)
115 | 
116 |         return {'boxes': boxes,
117 |                 'gt_classes': gt_classes,
118 |                 'gt_ishard': ishards,
119 |                 'gt_overlaps': overlaps,
120 |                 'flipped': False,
121 |                 'seg_areas': seg_areas}
122 | 
123 |     def _get_results_file_template(self):
124 |         # data_path/results/<comp_id>_det_test_aeroplane.txt
125 |         filename = self._comp_id + '_det_' + self._image_set + '_{:s}.txt'
126 |         filedir = os.path.join(self._data_path, 'results')
127 |         if not os.path.exists(filedir):
128 |             os.makedirs(filedir)
129 |         path = os.path.join(filedir, filename)
130 |         return path
131 | 
132 |     def _write_results_file(self, all_boxes):
133 |         for cls_ind, cls in enumerate(self.classes):
134 |             if cls == '__background__':
135 |                 continue
136 |             print('Writing {} results file'.format(cls))
137 |             filename = self._get_results_file_template().format(cls)
138 |             with open(filename, 'wt') as f:
139 |                 for im_ind, index in enumerate(self.image_index):
140 |                     dets = all_boxes[cls_ind][im_ind]
141 |                     if dets == []:
142 |                         continue
143 |                     for k in range(dets.shape[0]):
144 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
145 |                                 format(index, dets[k, -1],
146 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
147 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
148 | 
149 |     def _do_python_eval(self, output_dir='output'):
150 |         cachedir = os.path.join(self._data_path, 'annotations_cache')
151 |         if not os.path.isdir(output_dir):
152 |             os.mkdir(output_dir)
153 | 
154 |         aps = []
155 |         for i, cls in enumerate(self._classes):
156 |             if cls == '__background__':
157 |                 continue
158 |             filename = self._get_results_file_template().format(cls)
159 |             ap = custom_eval(filename, self.df, self._image_set, cls, cachedir, ovthresh=0.5)
160 |             print('AP for {} = {:.3f}'.format(cls, ap))
161 | 
162 |             aps.append(ap)
163 | 
164 |             if i == self.num_classes - len(cfg.CUSTOM_NOVEL_CLASSES):
165 |                 print('Mean AP = {:.4f} for base'.format(np.mean(aps)))
166 |             if i == self.num_classes:
167 |                 print('Mean AP = {:.4f} for novel'.format(np.mean(aps[-20:])))
168 | 
169 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
170 |         return np.mean(aps)
171 | 
172 |     def evaluate_detections(self, all_boxes, output_dir, **kwargs):
173 |         self._write_results_file(all_boxes)
174 |         AP = self._do_python_eval(output_dir)
175 |         if self.config['cleanup']:
176 |             for cls in self._classes:
177 |                 if cls == '__background__':
178 |                     continue
179 |                 filename = self._get_results_file_template().format(cls)
180 |                 os.remove(filename)
181 |         return AP


--------------------------------------------------------------------------------
/lib/datasets/custom_eval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import numpy as np
  4 | from math import radians
  5 | 
  6 | 
  7 | def parse_rec(df, filename):
  8 |     """ Parse annotation file """
  9 |     objects = []
 10 |     objs = df[df.im_path == filename]
 11 |     for ix in range(len(objs)):
 12 |         obj_struct = {}
 13 |         obj_struct['class'] = objs.iloc[ix]['cls']
 14 | 
 15 |         x1 = max(int(objs.iloc[ix]['left']), 0)
 16 |         y1 = max(int(objs.iloc[ix]['upper']), 0)
 17 |         x2 = min(int(objs.iloc[ix]['right']), int(objs.iloc[ix]['height'] - 1))
 18 |         y2 = min(int(objs.iloc[ix]['lower']), int(objs.iloc[ix]['width'] - 1))
 19 |         obj_struct['bbox'] = [x1, y1, x2, y2]
 20 | 
 21 |         obj_struct['difficult'] = objs.iloc[ix]['difficult']
 22 |         objects.append(obj_struct)
 23 | 
 24 |     return objects
 25 | 
 26 | 
 27 | def voc_ap(rec, prec):
 28 |     """
 29 |     Compute VOC-like AP given precision and recall.
 30 |     """
 31 |     # correct AP calculation
 32 |     # first append sentinel values at the end
 33 |     mrec = np.concatenate(([0.], rec, [1.]))
 34 |     mpre = np.concatenate(([0.], prec, [0.]))
 35 | 
 36 |     # compute the precision envelope
 37 |     for i in range(mpre.size - 1, 0, -1):
 38 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 39 | 
 40 |     # to calculate area under PR curve, look for points
 41 |     # where X axis (recall) changes value
 42 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 43 | 
 44 |     # and sum (\Delta recall) * prec
 45 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 46 |     return ap
 47 | 
 48 | 
 49 | def custom_eval(detpath, df, subset, classname, cachedir, ovthresh=0.5):
 50 |     """
 51 |     Top level function that does the evaluation.
 52 |     detpath:    Path to detections detpath.format(classname) should produce the detection results file.
 53 |     df:         Data frame get from annotation csv file for the subset [train / val / test]
 54 |     classname:  Class name
 55 |     cachedir:   Directory for caching the annotations
 56 |     [ovthresh]: Overlap threshold (default = 0.5)
 57 |     """
 58 |     # assumes detections are in detpath.format(classname)
 59 |     # cachedir caches the annotations in a pickle file
 60 | 
 61 |     # first load gt
 62 |     df = df[df.set == subset]
 63 |     if not os.path.isdir(cachedir):
 64 |         os.mkdir(cachedir)
 65 |     cachefile = os.path.join(cachedir, '{}_annots.pkl'.format(subset))
 66 | 
 67 |     # read list of images
 68 |     imagenames = np.unique(df.im_path).tolist()
 69 | 
 70 |     if not os.path.isfile(cachefile):
 71 |         # load annotations
 72 |         recs = {}
 73 |         for i, imagename in enumerate(imagenames):
 74 |             recs[imagename] = parse_rec(df, imagename)
 75 |             if i % 100 == 0:
 76 |                 print('Reading annotation for {:d}/{:d}'.format(i + 1, len(imagenames)))
 77 |         # save
 78 |         print('Saving cached annotations to {:s}'.format(cachefile))
 79 |         with open(cachefile, 'wb') as f:
 80 |             pickle.dump(recs, f)
 81 |     else:
 82 |         # load
 83 |         with open(cachefile, 'rb') as f:
 84 |             try:
 85 |                 recs = pickle.load(f)
 86 |             except:
 87 |                 recs = pickle.load(f, encoding='bytes')
 88 | 
 89 |     # extract gt objects for this class
 90 |     class_recs = {}
 91 |     npos = 0
 92 |     for imagename in imagenames:
 93 |         R = [obj for obj in recs[imagename] if obj['class'] == classname]
 94 |         bbox = np.array([x['bbox'] for x in R])
 95 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
 96 | 
 97 |         det = [False] * len(R)
 98 |         npos = npos + sum(~difficult)
 99 |         class_recs[imagename] = {'bbox': bbox,
100 |                                  'difficult': difficult,
101 |                                  'det': det}
102 | 
103 |     # read dets
104 |     detfile = detpath.format(classname)
105 |     with open(detfile, 'r') as f:
106 |         lines = f.readlines()
107 | 
108 |     splitlines = [x.strip().split(' ') for x in lines]
109 |     image_ids = [x[0] for x in splitlines]
110 |     confidence = np.array([float(x[1]) for x in splitlines])
111 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
112 | 
113 |     nd = len(image_ids)
114 |     tp = np.zeros(nd)
115 |     fp = np.zeros(nd)
116 | 
117 |     if BB.shape[0] > 0:
118 |         # sort by descending confidence
119 |         sorted_ind = np.argsort(-confidence)
120 |         BB = BB[sorted_ind, :]
121 |         image_ids = [image_ids[x] for x in sorted_ind]
122 | 
123 |         # iterate for each detection
124 |         for d in range(nd):
125 |             R = class_recs[image_ids[d]]
126 |             bb = BB[d, :].astype(float)
127 | 
128 |             ovmax = -np.inf
129 |             BBGT = R['bbox'].astype(float)
130 | 
131 |             if BBGT.size > 0:
132 |                 # intersection
133 |                 ixmin = np.maximum(BBGT[:, 0], bb[0])
134 |                 iymin = np.maximum(BBGT[:, 1], bb[1])
135 |                 ixmax = np.minimum(BBGT[:, 2], bb[2])
136 |                 iymax = np.minimum(BBGT[:, 3], bb[3])
137 |                 iw = np.maximum(ixmax - ixmin + 1., 0.)
138 |                 ih = np.maximum(iymax - iymin + 1., 0.)
139 |                 inters = iw * ih
140 | 
141 |                 # union
142 |                 uni = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \
143 |                       (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters
144 | 
145 |                 overlaps = inters / uni
146 |                 ovmax = np.max(overlaps)
147 |                 jmax = np.argmax(overlaps)
148 | 
149 |             # if the detection is correct
150 |             if ovmax > ovthresh:
151 |                 if R['difficult'][jmax]:
152 |                     continue
153 |                 if not R['det'][jmax]:
154 |                     R['det'][jmax] = 1
155 |                     tp[d] = 1.
156 |                 else:
157 |                     fp[d] = 1.
158 | 
159 |             else:
160 |                 fp[d] = 1.
161 | 
162 |     # compute metrics AP
163 |     fp = np.cumsum(fp)
164 |     tp = np.cumsum(tp)
165 |     rec = tp / float(npos)
166 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
167 |     ap = voc_ap(rec, prec)
168 | 
169 |     return ap


--------------------------------------------------------------------------------
/lib/datasets/custom_metadata.py:
--------------------------------------------------------------------------------
  1 | import os, sys
  2 | import numpy as np
  3 | import pandas as pd
  4 | import cv2
  5 | import collections
  6 | import random
  7 | import time
  8 | 
  9 | import torch
 10 | import torch.utils.data as data
 11 | 
 12 | from model.utils.config import cfg
 13 | import datasets
 14 | from datasets.imdb import imdb
 15 | import datasets.custom
 16 | 
 17 | 
 18 | class MetaDatasetCustom(data.Dataset):
 19 |     def __init__(self, data_path, csv_file, img_size, image_set='train', shots=10, phase=1, shuffle=False):
 20 | 
 21 |         self.shuffle = shuffle
 22 |         self.img_size = img_size
 23 |         self.phase = phase
 24 |         self.shot_path = os.path.join(data_path, 'Shots.txt')
 25 |         self.shots = shots
 26 |         if phase == 2:
 27 |             self.shots = shots * 3
 28 | 
 29 |         self._data_path = data_path
 30 |         assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)
 31 | 
 32 |         df = pd.read_csv(os.path.join(data_path, csv_file))
 33 | 
 34 |         self.df = df[df.set == image_set]
 35 | 
 36 |         self._classes = tuple(['__background__'] +
 37 |                               [c for c in np.unique(df.cls).tolist() if c not in cfg.CUSTOM_NOVEL_CLASSES] +
 38 |                               [c for c in np.unique(df.cls).tolist() if c in cfg.CUSTOM_NOVEL_CLASSES])
 39 |         self.num_classes = len(self._classes)
 40 | 
 41 |         if phase == 1:
 42 |             self.metaclass = [c for c in np.unique(df.cls).tolist() if c not in cfg.CUSTOM_NOVEL_CLASSES]
 43 |         else:
 44 |             self.metaclass = self._classes[1:]
 45 |         class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass))))  # class to index mapping
 46 | 
 47 |         self._image_index = np.unique(self.df.im_path).tolist()
 48 | 
 49 |         self.prndata = []
 50 |         self.prncls = []
 51 | 
 52 |         # Used for reproduce results with the same few-shot samples
 53 |         prn_image_pth = os.path.join(data_path, 'prn_image_{}shots.pt'.format(shots))
 54 |         prn_mask_pth = os.path.join(data_path, 'prn_mask_{}shots.pt'.format(shots))
 55 | 
 56 |         if os.path.exists(prn_image_pth) and os.path.exists(prn_mask_pth):
 57 |             prn_image = torch.load(prn_image_pth)
 58 |             prn_mask = torch.load(prn_mask_pth)
 59 |         else:
 60 |             prn_image, prn_mask = self.get_prndata()
 61 | 
 62 |             torch.save(prn_image, prn_image_pth)
 63 |             torch.save(prn_mask, prn_mask_pth)
 64 | 
 65 |         for i in range(shots):
 66 |             cls = []
 67 |             data = []
 68 |             for n, key in enumerate(list(prn_image.keys())):
 69 |                 img = torch.from_numpy(np.array(prn_image[key][i % len(prn_image[key])]))
 70 |                 img = img.unsqueeze(0)
 71 |                 mask = torch.from_numpy(np.array(prn_mask[key][i % len(prn_mask[key])]))
 72 |                 mask = mask.unsqueeze(0)
 73 |                 mask = mask.unsqueeze(3)
 74 |                 imgmask = torch.cat([img, mask], dim=3)
 75 |                 cls.append(class_to_idx[key])
 76 |                 data.append(imgmask.permute(0, 3, 1, 2).contiguous())
 77 |             self.prncls.append(cls)
 78 |             self.prndata.append(torch.cat(data, dim=0))
 79 | 
 80 |     def __getitem__(self, index):
 81 |         return self.prndata[index], self.prncls[index]
 82 | 
 83 |     def __len__(self):
 84 |         return len(self.prndata)
 85 | 
 86 |     def image_id_at(self, i):
 87 |         """
 88 |         Return the absolute path to image i in the image sequence.
 89 |         """
 90 |         return i
 91 | 
 92 |     def image_path_at(self, i):
 93 |         """
 94 |         Return the absolute path to image i in the image sequence.
 95 |         """
 96 |         return self.image_path_from_index(self._image_index[i])
 97 | 
 98 |     def image_path_from_index(self, index):
 99 |         """
100 |         Construct an image path from the image's "index" identifier.
101 |         """
102 |         image_path = os.path.join(self._data_path, index)
103 |         assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
104 |         return image_path
105 | 
106 |     def get_prndata(self):
107 |         print('\ngenerating {} shot samples...'.format(self.shots))
108 |         start = time.time()
109 | 
110 |         if self.shuffle:
111 |             random.shuffle(self._image_index)
112 |         prn_image = collections.defaultdict(list)
113 |         prn_mask = collections.defaultdict(list)
114 |         classes = collections.defaultdict(int)
115 |         valid_annot = []
116 | 
117 |         for cls in self.metaclass:
118 |             classes[cls] = 0
119 | 
120 |         for index in self._image_index:
121 | 
122 |             objs = self.df[self.df.im_path == index]
123 |             num_objs = len(objs)
124 | 
125 |             img = cv2.imread(self.image_path_from_index(index), cv2.IMREAD_COLOR)
126 |             
127 |             # tile channels for 1-channel images
128 |             if len(img.shape) == 2:
129 |                 img = img[:, :, np.newaxis]
130 |                 img = np.concatenate((img, img, img), axis=2)
131 | 
132 |             # drop the last channel for 4-channel images
133 |             if img.shape[-1] == 4:
134 |                 img = img[:, :, :-1]
135 | 
136 |             img = img.astype(np.float32, copy=False)
137 |             img -= cfg.PIXEL_MEANS
138 | 
139 |             mask = np.zeros((self.img_size, self.img_size), dtype=np.float32)
140 |             h, w = img.shape[0], img.shape[1]
141 |             y_ration = float(h) / self.img_size
142 |             x_ration = float(w) / self.img_size
143 |             img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR)
144 | 
145 |             for ix in range(num_objs):
146 |                 x1 = max(float(objs.iloc[ix]['left']), 0)
147 |                 y1 = max(float(objs.iloc[ix]['upper']), 0)
148 |                 x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1)
149 |                 y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1)
150 |                 cls = objs.iloc[ix]['cls']
151 | 
152 |                 if objs.iloc[ix]['difficult']:
153 |                     continue
154 | 
155 |                 if cls not in self.metaclass:
156 |                     continue
157 | 
158 |                 if classes[cls] >= self.shots:
159 |                     continue
160 | 
161 |                 classes[cls] += 1
162 | 
163 |                 x1 = int(x1 / x_ration)
164 |                 y1 = int(y1 / y_ration)
165 |                 x2 = int(x2 / x_ration)
166 |                 y2 = int(y2 / y_ration)
167 |                 mask[y1:y2, x1:x2] = 1
168 | 
169 |                 prn_image[cls].append(img_resize)
170 |                 prn_mask[cls].append(mask)
171 | 
172 |                 valid_annot.append(objs.iloc[[ix]])
173 | 
174 |             if len(classes) > 0 and min(classes.values()) == self.shots:
175 |                 break
176 | 
177 |         end = time.time()
178 |         print('few-shot samples generated in {} s\n'.format(end - start))
179 | 
180 |         # save filtered csv file
181 |         valid_annot = pd.concat(valid_annot)
182 |         valid_annot.to_csv(self.shot_path, index=False, header=True)
183 | 
184 |         return prn_image, prn_mask
185 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | 
15 | from datasets.coco import coco
16 | from datasets.pascal_voc import pascal_voc
17 | from datasets.objectnet3d import objectnet3d
18 | from datasets.custom import custom
19 | 
20 | 
21 | # Set up Custom Dataset
22 | for split in ['train', 'val', 'shots']:
23 |     name = 'custom_{}'.format(split)
24 |     data_path = 'Your_Dataset_Root_Path'
25 |     csv_file = 'Custom.txt'
26 |     if split == 'shots':
27 |         split = 'train'
28 |         csv_file = 'Shots.txt'
29 |     __sets[name] = (lambda split=split, data_path=data_path, csv_file=csv_file: custom(split, data_path, csv_file))
30 | 
31 | 
32 | # Set up objectnet3d_<split>
33 | for split in ['train', 'val', 'test', 'shots']:
34 |     name = 'objectnet3d_{}'.format(split)
35 |     data_path = '/home/xiao/Datasets/ObjectNet3D'
36 |     csv_file = 'ObjectNet3D_new.txt'
37 |     if split == 'shots':
38 |         split = 'train'
39 |         csv_file = 'Shots.txt'
40 |     __sets[name] = (lambda split=split, data_path=data_path, csv_file=csv_file: objectnet3d(split, data_path, csv_file))
41 | 
42 | 
43 | # # Set up voc_<year>_<split>
44 | for year in ['2007', '2012']:
45 |     for split in ['train', 'val', 'trainval', 'test', 'shots',
46 |                   'train_first_split', 'train_second_split', 'train_third_split']:
47 |         name = 'voc_{}_{}'.format(year, split)
48 |         __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
49 | 
50 | for year in ['2014']:
51 |     for split in ['train', 'val', 'minival', 'valminusminival', 'trainval', 'shots',
52 |                   'TFA1shot', 'TFA2shot', 'TFA3shot', 'TFA5shot', 'TFA10shot', 'TFA30shot']:
53 |         name = 'coco_{}_{}'.format(year, split)
54 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
55 | 
56 | for year in ['2017']:
57 |     for split in ['train', 'val']:
58 |         name = 'coco_{}_{}'.format(year, split)
59 |         __sets[name] = (lambda split=split, year=year: coco(split, year))
60 | 
61 | 
62 | def get_imdb(name):
63 |     """Get an imdb (image database) by name."""
64 |     if name not in __sets:
65 |         raise KeyError('Unknown dataset: {}'.format(name))
66 |     return __sets[name]()
67 | 
68 | 
69 | def list_imdbs():
70 |     """List all registered imdbs."""
71 |     return list(__sets.keys())
72 | 


--------------------------------------------------------------------------------
/lib/datasets/metadata.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Pytorch Meta R-CNN
  3 | # Written by Anny Xu, Xiaopeng Yan, based on the code from Jianwei Yang
  4 | # --------------------------------------------------------
  5 | import os
  6 | import os.path
  7 | import sys
  8 | import torch.utils.data as data
  9 | import cv2
 10 | import torch
 11 | import random
 12 | import numpy as np
 13 | if sys.version_info[0] == 2:
 14 |     import xml.etree.cElementTree as ET
 15 | else:
 16 |     import xml.etree.ElementTree as ET
 17 | from model.utils.config import cfg
 18 | import collections
 19 | 
 20 | 
 21 | class MetaDataset(data.Dataset):
 22 | 
 23 |     """Meta Dataset
 24 |     Arguments:
 25 |         root (string): filepath to VOCdevkit folder.
 26 |         image_set (string): imageset to use (eg. 'train', 'val')
 27 |         metaclass(string): the class name
 28 |         img_size(int) : the PRN network input size
 29 |         shot(int): the number of instances
 30 |         shuffle(bool)
 31 |     """
 32 | 
 33 |     def __init__(self, root, image_sets, metaclass, img_size, shots=1, shuffle=False, phase=1):
 34 |         self.root = root
 35 |         self.image_set = image_sets
 36 |         self.img_size = img_size
 37 |         self.metaclass = metaclass
 38 |         self.shots = shots
 39 |         if phase == 2:
 40 |             self.shots = shots * 3
 41 |         self.shuffle = shuffle
 42 |         self._annopath = os.path.join('%s', 'Annotations', '%s.xml')
 43 |         self._imgpath = os.path.join('%s', 'JPEGImages', '%s.jpg')
 44 |         self.shot_path = open(os.path.join(self.root, 'VOC2007', 'ImageSets/Main/shots.txt'), 'w')
 45 |         self.ids = list()
 46 |         for (year, name) in image_sets:
 47 |             self._year = year
 48 |             rootpath = os.path.join(self.root, 'VOC' + year)
 49 |             for line in open(os.path.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
 50 |                 self.ids.append((rootpath, line.strip()))
 51 | 
 52 |         class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass))))  # class to index mapping
 53 | 
 54 |         self.prndata = []
 55 |         self.prncls = []
 56 |         prn_image, prn_mask = self.get_prndata()
 57 |         for i in range(shots):
 58 |             cls = []
 59 |             data = []
 60 |             for n, key in enumerate(list(prn_image.keys())):
 61 |                 img = torch.from_numpy(np.array(prn_image[key][i]))
 62 |                 img = img.unsqueeze(0)
 63 |                 mask = torch.from_numpy(np.array(prn_mask[key][i]))
 64 |                 mask = mask.unsqueeze(0)
 65 |                 mask = mask.unsqueeze(3)
 66 |                 imgmask = torch.cat([img, mask], dim=3)
 67 |                 cls.append(class_to_idx[key])
 68 |                 data.append(imgmask.permute(0, 3, 1, 2).contiguous())
 69 |             self.prncls.append(cls)
 70 |             self.prndata.append(torch.cat(data, dim=0))
 71 | 
 72 |     def __getitem__(self, index):
 73 |         return self.prndata[index], self.prncls[index]
 74 | 
 75 |     def get_prndata(self):
 76 |         '''
 77 |         :return: the construct prn input data
 78 |         :prn_image: lists of images in shape of (H, W, 3)
 79 |         :prn_mask: lists of masks in shape pf (H, W)
 80 |         '''
 81 |         if self.shuffle:
 82 |             random.shuffle(self.ids)
 83 |         prn_image = collections.defaultdict(list)
 84 |         prn_mask = collections.defaultdict(list)
 85 |         classes = collections.defaultdict(int)
 86 |         for cls in self.metaclass:
 87 |             classes[cls] = 0
 88 |         for img_id in self.ids:
 89 |             target = ET.parse(self._annopath % img_id).getroot()
 90 |             img = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
 91 |             img = img.astype(np.float32, copy=False)
 92 |             img -= cfg.PIXEL_MEANS
 93 |             height, width, _ = img.shape
 94 |             mask = np.zeros((self.img_size, self.img_size), dtype=np.float32)
 95 |             h, w, _ = img.shape
 96 |             y_ration = float(h) / self.img_size
 97 |             x_ration = float(w) / self.img_size
 98 |             img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR)
 99 |             for obj in target.iter('object'):
100 |                 difficult = int(obj.find('difficult').text) == 1
101 |                 if difficult:
102 |                     continue
103 |                 name = obj.find('name').text.strip()
104 |                 if name not in self.metaclass:
105 |                     continue
106 |                 if classes[name] >= self.shots:
107 |                     break
108 |                 classes[name] += 1
109 |                 bbox = obj.find('bndbox')
110 |                 pts = ['xmin', 'ymin', 'xmax', 'ymax']
111 |                 bndbox = []
112 |                 for i, pt in enumerate(pts):
113 |                     cur_pt = int(float(bbox.find(pt).text)) - 1
114 |                     if i % 2 == 0:
115 |                         cur_pt = int(cur_pt / x_ration)
116 |                         bndbox.append(cur_pt)
117 |                     elif i % 2 == 1:
118 |                         cur_pt = int(cur_pt / y_ration)
119 |                         bndbox.append(cur_pt)
120 |                 mask[bndbox[1]:bndbox[3], bndbox[0]:bndbox[2]] = 1
121 |                 prn_image[name].append(img_resize)
122 |                 prn_mask[name].append(mask)
123 |                 self.shot_path.write(str(img_id[1])+'\n')
124 |                 break
125 |             if len(classes) > 0 and min(classes.values()) == self.shots:
126 |                 break
127 |         self.shot_path.close()
128 |         return prn_image, prn_mask
129 | 
130 |     def __len__(self):
131 |         return len(self.prndata)
132 | 


--------------------------------------------------------------------------------
/lib/datasets/metadata_3d.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import datasets
 10 | import datasets.objectnet3d
 11 | import os, sys
 12 | from datasets.imdb import imdb
 13 | import xml.dom.minidom as minidom
 14 | import numpy as np
 15 | import scipy.sparse
 16 | import scipy.io as sio
 17 | import subprocess
 18 | import pickle
 19 | from model.utils.config import cfg
 20 | import pandas as pd
 21 | 
 22 | import torch.utils.data as data
 23 | import cv2
 24 | import torch
 25 | import random
 26 | import collections
 27 | import time
 28 | 
 29 | 
 30 | class MetaDataset3D(data.Dataset):
 31 |     def __init__(self, data_path, csv_file, img_size, image_set='train', shots=1, phase=1, shuffle=False):
 32 | 
 33 |         self.shuffle = shuffle
 34 |         self.img_size = img_size
 35 |         self.phase = phase
 36 |         self.shot_path = os.path.join(data_path, 'Shots.txt')
 37 |         self.shots = shots
 38 |         if phase == 2:
 39 |             self.shots = shots * 3
 40 | 
 41 |         self._data_path = data_path
 42 |         assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)
 43 | 
 44 |         df = pd.read_csv(os.path.join(data_path, csv_file))
 45 | 
 46 |         if image_set != 'train':
 47 |             df = df[df.occluded == 0]
 48 |             df = df[df.truncated == 0]
 49 |         self.df = df[df.set == image_set]
 50 | 
 51 |         self._classes = tuple(['__background__'] +
 52 |                               [c for c in np.unique(df.cat).tolist() if c not in cfg.NOVEL_3D_CLASSES] +
 53 |                               [c for c in np.unique(df.cat).tolist() if c in cfg.NOVEL_3D_CLASSES])
 54 |         self.num_classes = len(self._classes)
 55 | 
 56 |         if phase == 1:
 57 |             self.metaclass = [c for c in np.unique(df.cat).tolist() if c not in cfg.NOVEL_3D_CLASSES]
 58 |         else:
 59 |             self.metaclass = self._classes[1:]
 60 |         class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass))))  # class to index mapping
 61 | 
 62 |         self._image_index = np.unique(self.df.im_path).tolist()
 63 | 
 64 |         self.prndata = []
 65 |         self.prncls = []
 66 | 
 67 |         prn_image_pth = os.path.join(data_path, 'prn_image_{}shots.pt'.format(shots))
 68 |         prn_mask_pth = os.path.join(data_path, 'prn_mask_{}shots.pt'.format(shots))
 69 | 
 70 |         if os.path.exists(prn_image_pth) and os.path.exists(prn_mask_pth):
 71 |             prn_image = torch.load(prn_image_pth)
 72 |             prn_mask = torch.load(prn_mask_pth)
 73 |         else:
 74 |             prn_image, prn_mask = self.get_prndata()
 75 | 
 76 |             torch.save(prn_image, prn_image_pth)
 77 |             torch.save(prn_mask, prn_mask_pth)
 78 | 
 79 |         for i in range(shots):
 80 |             cls = []
 81 |             data = []
 82 |             for n, key in enumerate(list(prn_image.keys())):
 83 |                 img = torch.from_numpy(np.array(prn_image[key][i % len(prn_image[key])]))
 84 |                 img = img.unsqueeze(0)
 85 |                 mask = torch.from_numpy(np.array(prn_mask[key][i % len(prn_mask[key])]))
 86 |                 mask = mask.unsqueeze(0)
 87 |                 mask = mask.unsqueeze(3)
 88 |                 imgmask = torch.cat([img, mask], dim=3)
 89 |                 cls.append(class_to_idx[key])
 90 |                 data.append(imgmask.permute(0, 3, 1, 2).contiguous())
 91 |             self.prncls.append(cls)
 92 |             self.prndata.append(torch.cat(data, dim=0))
 93 | 
 94 |     def __getitem__(self, index):
 95 |         return self.prndata[index], self.prncls[index]
 96 | 
 97 |     def __len__(self):
 98 |         return len(self.prndata)
 99 | 
100 |     def image_id_at(self, i):
101 |         """
102 |         Return the absolute path to image i in the image sequence.
103 |         """
104 |         return i
105 | 
106 |     def image_path_at(self, i):
107 |         """
108 |         Return the absolute path to image i in the image sequence.
109 |         """
110 |         return self.image_path_from_index(self._image_index[i])
111 | 
112 |     def image_path_from_index(self, index):
113 |         """
114 |         Construct an image path from the image's "index" identifier.
115 |         """
116 |         image_path = os.path.join(self._data_path, index)
117 |         assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
118 |         return image_path
119 | 
120 |     def get_prndata(self):
121 |         print('\ngenerating {} shot samples...'.format(self.shots))
122 |         start = time.time()
123 | 
124 |         if self.shuffle:
125 |             random.shuffle(self._image_index)
126 |         prn_image = collections.defaultdict(list)
127 |         prn_mask = collections.defaultdict(list)
128 |         classes = collections.defaultdict(int)
129 |         valid_annot = []
130 | 
131 |         for cls in self.metaclass:
132 |             classes[cls] = 0
133 | 
134 |         for index in self._image_index:
135 | 
136 |             objs = self.df[self.df.im_path == index]
137 |             num_objs = len(objs)
138 | 
139 |             img = cv2.imread(self.image_path_from_index(index), cv2.IMREAD_COLOR)
140 |             # tile channels for 1-channel images
141 |             if len(img.shape) == 2:
142 |                 img = img[:, :, np.newaxis]
143 |                 img = np.concatenate((img, img, img), axis=2)
144 | 
145 |             # drop the last channel for 4-channel images
146 |             if img.shape[-1] == 4:
147 |                 img = img[:, :, :-1]
148 | 
149 |             img = img.astype(np.float32, copy=False)
150 |             img -= cfg.PIXEL_MEANS
151 | 
152 |             mask = np.zeros((self.img_size, self.img_size), dtype=np.float32)
153 |             h, w = img.shape[0], img.shape[1]
154 |             y_ration = float(h) / self.img_size
155 |             x_ration = float(w) / self.img_size
156 |             img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR)
157 | 
158 |             for ix in range(num_objs):
159 |                 x1 = max(float(objs.iloc[ix]['left']), 0)
160 |                 y1 = max(float(objs.iloc[ix]['upper']), 0)
161 |                 x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1)
162 |                 y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1)
163 |                 cls = objs.iloc[ix]['cat']
164 | 
165 |                 if objs.iloc[ix]['difficult'] or objs.iloc[ix]['occluded'] or objs.iloc[ix]['truncated']:
166 |                     continue
167 | 
168 |                 if cls not in self.metaclass:
169 |                     continue
170 | 
171 |                 if classes[cls] >= self.shots:
172 |                     break
173 | 
174 |                 classes[cls] += 1
175 | 
176 |                 x1 = int(x1 / x_ration)
177 |                 y1 = int(y1 / y_ration)
178 |                 x2 = int(x2 / x_ration)
179 |                 y2 = int(y2 / y_ration)
180 |                 mask[y1:y2, x1:x2] = 1
181 | 
182 |                 prn_image[cls].append(img_resize)
183 |                 prn_mask[cls].append(mask)
184 | 
185 |                 valid_annot.append(objs.iloc[[ix]])
186 |                 break
187 | 
188 |             if len(classes) > 0 and min(classes.values()) == self.shots:
189 |                 break
190 | 
191 |         end = time.time()
192 |         print('few-shot samples generated in {} s\n'.format(end - start))
193 | 
194 |         # save filtered csv file
195 |         valid_annot = pd.concat(valid_annot)
196 |         valid_annot.to_csv(self.shot_path, index=False, header=True)
197 | 
198 |         return prn_image, prn_mask
199 | 


--------------------------------------------------------------------------------
/lib/datasets/metadata_TFA.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | import cv2
  3 | import torch
  4 | import collections
  5 | import time
  6 | import os
  7 | import numpy as np
  8 | import json
  9 | import os.path as osp
 10 | 
 11 | from model.utils.config import cfg
 12 | from pycocotools.coco import COCO
 13 | 
 14 | 
 15 | class MetaDatasetTFA(data.Dataset):
 16 |     def __init__(self, root, image_set, year, img_size, shots=10):
 17 |         self.img_size = img_size
 18 |         self.TFA_split = os.path.join(root, 'annotations', 'TFA', 'cocosplit')
 19 |         self.shot_path = os.path.join(root, 'annotations', 'instances_TFA{}shot2014.json'.format(shots))
 20 |         self.shots = shots
 21 | 
 22 |         # name, paths
 23 |         self._year = year
 24 |         self._image_set = image_set
 25 |         self._data_path = root
 26 | 
 27 |         # load COCO API, classes, class <-> id mappings
 28 |         self._COCO = COCO(self._get_ann_file())
 29 |         self.json_data = self._COCO.dataset.copy()
 30 |         cats = self._COCO.loadCats(self._COCO.getCatIds())
 31 | 
 32 |         self._classes = tuple(['__background__'] +
 33 |                               [c['name'] for c in cats if c['name'] not in cfg.VOC_CLASSES] +
 34 |                               [c['name'] for c in cats if c['name'] in cfg.VOC_CLASSES])
 35 | 
 36 |         self._class_to_coco_cat_id = dict(list(zip([c['name'] for c in cats], self._COCO.getCatIds())))
 37 | 
 38 |         self.metaclass = tuple([c['name'] for c in cats if c['name'] not in cfg.VOC_CLASSES] +
 39 |                                [c['name'] for c in cats if c['name'] in cfg.VOC_CLASSES])
 40 |         class_to_idx = dict(zip(self.metaclass, range(len(self.metaclass))))  # class to index mapping
 41 | 
 42 |         self.prndata = []
 43 |         self.prncls = []
 44 | 
 45 |         prn_image_pth = os.path.join(root, 'annotations', 'TFA', 'prn_image_{}shots.pt'.format(shots))
 46 |         prn_mask_pth = os.path.join(root, 'annotations', 'TFA', 'prn_mask_{}shots.pt'.format(shots))
 47 | 
 48 |         if os.path.exists(prn_image_pth) and os.path.exists(prn_mask_pth):
 49 |             prn_image = torch.load(prn_image_pth)
 50 |             prn_mask = torch.load(prn_mask_pth)
 51 |         else:
 52 |             prn_image, prn_mask = self.get_prndata()
 53 | 
 54 |             torch.save(prn_image, prn_image_pth)
 55 |             torch.save(prn_mask, prn_mask_pth)
 56 | 
 57 |         for i in range(shots):
 58 |             cls = []
 59 |             data = []
 60 |             for n, key in enumerate(list(prn_image.keys())):
 61 |                 img = torch.from_numpy(np.array(prn_image[key][i % len(prn_image[key])]))
 62 |                 img = img.unsqueeze(0)
 63 |                 mask = torch.from_numpy(np.array(prn_mask[key][i % len(prn_mask[key])]))
 64 |                 mask = mask.unsqueeze(0)
 65 |                 mask = mask.unsqueeze(3)
 66 |                 imgmask = torch.cat([img, mask], dim=3)
 67 |                 cls.append(class_to_idx[key])
 68 |                 data.append(imgmask.permute(0, 3, 1, 2).contiguous())
 69 |             self.prncls.append(cls)
 70 |             self.prndata.append(torch.cat(data, dim=0))
 71 | 
 72 |     def __getitem__(self, index):
 73 |         return self.prndata[index], self.prncls[index]
 74 | 
 75 |     def __len__(self):
 76 |         return len(self.prndata)
 77 | 
 78 |     def _get_ann_file(self):
 79 |         prefix = 'instances' if self._image_set.find('test') == -1 else 'image_info'
 80 |         return osp.join(self._data_path, 'annotations', prefix + '_' + self._image_set + self._year + '.json')
 81 | 
 82 |     def get_prndata(self):
 83 |         print('\ngenerating {} shot samples...'.format(self.shots))
 84 |         start = time.time()
 85 | 
 86 |         prn_image = collections.defaultdict(list)
 87 |         prn_mask = collections.defaultdict(list)
 88 |         classes = collections.defaultdict(int)
 89 | 
 90 |         sample_images = []
 91 |         sample_annots = []
 92 | 
 93 |         for cls in self.metaclass:
 94 |             cls_json_file = 'full_box_{}shot_{}_trainval.json'.format(self.shots, cls)
 95 |             samples = json.load(open(osp.join(self.TFA_split, cls_json_file), 'r'))
 96 | 
 97 |             sample_annots.extend(samples['annotations'])
 98 |             sample_images.extend(samples['images'])
 99 | 
100 |             for d_annot in samples['annotations']:
101 |                 img_id = d_annot['image_id']
102 | 
103 |                 for d_img in samples['images']:
104 |                     if d_img['id'] == img_id:
105 |                         img_info = d_img
106 |                         break
107 | 
108 |                 width = img_info['width']
109 |                 height = img_info['height']
110 | 
111 |                 x1 = np.max((0, d_annot['bbox'][0]))
112 |                 y1 = np.max((0, d_annot['bbox'][1]))
113 |                 x2 = np.min((width - 1, x1 + np.max((0, d_annot['bbox'][2] - 1))))
114 |                 y2 = np.min((height - 1, y1 + np.max((0, d_annot['bbox'][3] - 1))))
115 | 
116 |                 if d_annot['area'] > 0 and x2 >= x1 and y2 >= y1:
117 |                     set_name = img_info['file_name'].split('_')[1]
118 |                     img_path = osp.join(self._data_path, 'images', set_name, img_info['file_name'])
119 |                     img = cv2.imread(img_path, cv2.IMREAD_COLOR)
120 |                     img = img.astype(np.float32, copy=False)
121 |                     img -= cfg.PIXEL_MEANS
122 | 
123 |                     mask = np.zeros((self.img_size, self.img_size), dtype=np.float32)
124 |                     h, w, _ = img.shape
125 |                     y_ration = float(h) / self.img_size
126 |                     x_ration = float(w) / self.img_size
127 |                     img_resize = cv2.resize(img, (self.img_size, self.img_size), interpolation=cv2.INTER_LINEAR)
128 | 
129 |                     classes[cls] += 1
130 |                     x1_r = int(x1 / x_ration)
131 |                     y1_r = int(y1 / y_ration)
132 |                     x2_r = int(x2 / x_ration)
133 |                     y2_r = int(y2 / y_ration)
134 | 
135 |                     mask[y1_r:y2_r, x1_r:x2_r] = 1
136 | 
137 |                     prn_image[cls].append(img_resize)
138 |                     prn_mask[cls].append(mask)
139 | 
140 |         end = time.time()
141 |         print('few-shot samples generated in {} s\n'.format(end - start))
142 |         print('minimum sample number among all classes is {}'.format(min(classes.values())))
143 | 
144 |         self.json_data['images'] = sample_images
145 |         self.json_data['annotations'] = sample_annots
146 | 
147 |         def convert(o):
148 |             if isinstance(o, np.int64): return int(o)
149 |             raise TypeError
150 | 
151 |         with open(self.shot_path, 'w') as f:
152 |             json.dump(self.json_data, f, default=convert)
153 | 
154 |         return prn_image, prn_mask
155 | 


--------------------------------------------------------------------------------
/lib/datasets/objectnet3d.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import datasets
 10 | import datasets.objectnet3d
 11 | import os, sys
 12 | from datasets.imdb import imdb
 13 | import xml.dom.minidom as minidom
 14 | import numpy as np
 15 | import scipy.sparse
 16 | import scipy.io as sio
 17 | import subprocess
 18 | import pickle
 19 | import pandas as pd
 20 | from model.utils.config import cfg
 21 | from .pascal3d_eval import pascal3d_eval
 22 | 
 23 | try:
 24 |     xrange          # Python 2
 25 | except NameError:
 26 |     xrange = range  # Python 3
 27 | 
 28 | 
 29 | class objectnet3d(imdb):
 30 |     def __init__(self, image_set, data_path, csv_file):
 31 |         imdb.__init__(self, 'objectnet3d_{}'.format(image_set))
 32 |         self._image_set = image_set
 33 |         self._data_path = data_path
 34 |         assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)
 35 | 
 36 |         df = pd.read_csv(os.path.join(data_path, csv_file))
 37 | 
 38 |         self.df = df[df.set == 'train'] if image_set == 'shots' else df[df.set == image_set]
 39 | 
 40 |         self._classes = tuple(['__background__'] +
 41 |                               [c for c in np.unique(df.cat).tolist() if c not in cfg.NOVEL_3D_CLASSES] +
 42 |                               [c for c in np.unique(df.cat).tolist() if c in cfg.NOVEL_3D_CLASSES])
 43 |         print('Dataset classes :{}'.format(self._classes))
 44 | 
 45 |         self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))
 46 | 
 47 |         self._image_index = np.unique(self.df.im_path).tolist()
 48 | 
 49 |         # Default to roidb handler
 50 |         self._roidb_handler = self.gt_roidb
 51 |         self._comp_id = 'comp4'
 52 | 
 53 |         # Specific config options
 54 |         self.config = {'cleanup': False,
 55 |                        'use_salt': True,
 56 |                        'use_diff': False,
 57 |                        'matlab_eval': False}
 58 | 
 59 |     def image_id_at(self, i):
 60 |         """
 61 |         Return the absolute path to image i in the image sequence.
 62 |         """
 63 |         return i
 64 | 
 65 |     def image_path_at(self, i):
 66 |         """
 67 |         Return the absolute path to image i in the image sequence.
 68 |         """
 69 |         return self.image_path_from_index(self._image_index[i])
 70 | 
 71 |     def image_path_from_index(self, index):
 72 |         """
 73 |         Construct an image path from the image's "index" identifier.
 74 |         """
 75 |         image_path = os.path.join(self._data_path, index)
 76 |         assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
 77 |         return image_path
 78 | 
 79 |     def gt_roidb(self):
 80 |         """
 81 |         Return the database of ground-truth regions of interest.
 82 |         This function loads/saves from/to a cache file to speed up future calls.
 83 |         """
 84 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
 85 |         if os.path.exists(cache_file):
 86 |             with open(cache_file, 'rb') as fid:
 87 |                 roidb = pickle.load(fid)
 88 |             print('{} gt roidb loaded from {}'.format(self.name, cache_file))
 89 |             return roidb
 90 | 
 91 |         gt_roidb = [self._load_pascal3d_annotation(index) for index in self.image_index]
 92 |         with open(cache_file, 'wb') as fid:
 93 |             pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
 94 |         print('wrote gt roidb to {}'.format(cache_file))
 95 | 
 96 |         return gt_roidb
 97 | 
 98 |     def _load_pascal3d_annotation(self, index):
 99 |         """
100 |         Load image and bounding boxes info from txt files of pascal3d.
101 |         """
102 | 
103 |         objs = self.df[self.df.im_path == index]
104 |         num_objs = len(objs)
105 | 
106 |         # original annotation for object detection
107 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
108 |         gt_classes = np.zeros(num_objs, dtype=np.int32)
109 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
110 | 
111 |         # "Seg" area for pascal is just the box area
112 |         seg_areas = np.zeros((num_objs), dtype=np.float32)
113 |         ishards = np.zeros((num_objs), dtype=np.int32)
114 | 
115 |         # Load object annotation into a data frame.
116 |         for ix in range(num_objs):
117 |             x1 = max(float(objs.iloc[ix]['left']), 0)
118 |             y1 = max(float(objs.iloc[ix]['upper']), 0)
119 |             x2 = min(float(objs.iloc[ix]['right']), objs.iloc[ix]['width'] - 1)
120 |             y2 = min(float(objs.iloc[ix]['lower']), objs.iloc[ix]['height'] - 1)
121 |             cls = self._class_to_ind[objs.iloc[ix]['cat']]
122 |             boxes[ix, :] = [x1, y1, x2, y2]
123 |             gt_classes[ix] = cls
124 |             overlaps[ix, cls] = 1.0
125 | 
126 |             ishards[ix] = objs.iloc[ix]['difficult']
127 |             if cls not in self._classes:
128 |                 continue
129 |             seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
130 | 
131 |         overlaps = scipy.sparse.csr_matrix(overlaps)
132 | 
133 |         return {'boxes': boxes,
134 |                 'gt_classes': gt_classes,
135 |                 'gt_ishard': ishards,
136 |                 'gt_overlaps': overlaps,
137 |                 'flipped': False,
138 |                 'seg_areas': seg_areas}
139 | 
140 |     def _get_results_file_template(self):
141 |         # data_path/results/<comp_id>_det_test_aeroplane.txt
142 |         filename = self._comp_id + '_det_' + self._image_set + '_{:s}.txt'
143 |         filedir = os.path.join(self._data_path, 'results')
144 |         if not os.path.exists(filedir):
145 |             os.makedirs(filedir)
146 |         path = os.path.join(filedir, filename)
147 |         return path
148 | 
149 |     def _write_results_file(self, all_boxes):
150 |         for cls_ind, cls in enumerate(self.classes):
151 |             if cls == '__background__':
152 |                 continue
153 |             print('Writing {} ObjectNet3D results file'.format(cls))
154 |             filename = self._get_results_file_template().format(cls)
155 |             with open(filename, 'wt') as f:
156 |                 for im_ind, index in enumerate(self.image_index):
157 |                     dets = all_boxes[cls_ind][im_ind]
158 |                     if dets == []:
159 |                         continue
160 |                     # the VOCdevkit expects 1-based indices
161 |                     for k in xrange(dets.shape[0]):
162 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
163 |                                 format(index, dets[k, -1],
164 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
165 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
166 | 
167 |     def _do_python_eval(self, output_dir='output'):
168 |         cachedir = os.path.join(self._data_path, 'annotations_cache')
169 |         if not os.path.isdir(output_dir):
170 |             os.mkdir(output_dir)
171 | 
172 |         aps = []
173 |         for i, cls in enumerate(self._classes):
174 |             if cls == '__background__':
175 |                 continue
176 |             filename = self._get_results_file_template().format(cls)
177 |             ap = pascal3d_eval(filename, self.df, self._image_set, cls, cachedir, ovthresh=0.5)
178 |             print('AP for {} = {:.3f}'.format(cls, ap))
179 | 
180 |             aps.append(ap)
181 | 
182 |             if i == 80:
183 |                 print('Mean AP = {:.4f} for base'.format(np.mean(aps)))
184 |             if i == 100:
185 |                 print('Mean AP = {:.4f} for novel'.format(np.mean(aps[-20:])))
186 | 
187 |         print('Mean AP = {:.4f}'.format(np.mean(aps)))
188 |         return np.mean(aps)
189 | 
190 |     def evaluate_detections(self, all_boxes, output_dir, **kwargs):
191 |         self._write_results_file(all_boxes)
192 |         AP = self._do_python_eval(output_dir)
193 |         if self.config['cleanup']:
194 |             for cls in self._classes:
195 |                 if cls == '__background__':
196 |                     continue
197 |                 filename = self._get_results_file_template().format(cls)
198 |                 os.remove(filename)
199 |         return AP
200 | 


--------------------------------------------------------------------------------
/lib/datasets/pascal3d_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import os
 11 | import pickle
 12 | import numpy as np
 13 | from math import radians
 14 | 
 15 | 
 16 | def parse_rec(df, filename):
 17 |     """ Parse PASCAL 3D annotation file """
 18 |     objects = []
 19 |     objs = df[df.im_path == filename]
 20 |     for ix in range(len(objs)):
 21 |         obj_struct = {}
 22 |         obj_struct['class'] = objs.iloc[ix]['cat']
 23 | 
 24 |         x1 = max(int(objs.iloc[ix]['left']), 0)
 25 |         y1 = max(int(objs.iloc[ix]['upper']), 0)
 26 |         x2 = min(int(objs.iloc[ix]['right']), int(objs.iloc[ix]['height'] - 1))
 27 |         y2 = min(int(objs.iloc[ix]['lower']), int(objs.iloc[ix]['width'] - 1))
 28 | 
 29 |         obj_struct['bbox'] = [x1, y1, x2, y2]
 30 | 
 31 |         obj_struct['difficult'] = objs.iloc[ix]['difficult']
 32 |         obj_struct['truncated'] = objs.iloc[ix]['truncated']
 33 |         obj_struct['occluded'] = objs.iloc[ix]['occluded']
 34 |         objects.append(obj_struct)
 35 | 
 36 |     return objects
 37 | 
 38 | 
 39 | def voc_ap(rec, prec):
 40 |     """
 41 |     Compute VOC AP given precision and recall.
 42 |     """
 43 |     # correct AP calculation
 44 |     # first append sentinel values at the end
 45 |     mrec = np.concatenate(([0.], rec, [1.]))
 46 |     mpre = np.concatenate(([0.], prec, [0.]))
 47 | 
 48 |     # compute the precision envelope
 49 |     for i in range(mpre.size - 1, 0, -1):
 50 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 51 | 
 52 |     # to calculate area under PR curve, look for points
 53 |     # where X axis (recall) changes value
 54 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 55 | 
 56 |     # and sum (\Delta recall) * prec
 57 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 58 |     return ap
 59 | 
 60 | 
 61 | def angles_to_matrix(angles):
 62 |     """Compute the rotation matrix from euler angles in degrees"""
 63 |     azi = radians(angles[0])
 64 |     ele = radians(angles[1])
 65 |     inp = radians(angles[2])
 66 |     element1 = np.cos(inp) * np.cos(azi) - np.sin(inp) * np.cos(ele) * np.sin(azi)
 67 |     element2 = np.sin(inp) * np.cos(azi) + np.cos(inp) * np.cos(ele) * np.sin(azi)
 68 |     element3 = np.sin(ele) * np.sin(azi)
 69 |     element4 = -np.cos(inp) * np.sin(azi) - np.sin(inp) * np.cos(ele) * np.cos(azi)
 70 |     element5 = -np.sin(inp) * np.sin(azi) + np.cos(inp) * np.cos(ele) * np.cos(azi)
 71 |     element6 = np.sin(ele) * np.cos(azi)
 72 |     element7 = np.sin(inp) * np.sin(ele)
 73 |     element8 = -np.cos(inp) * np.sin(ele)
 74 |     element9 = np.cos(ele)
 75 |     R_mat = np.array((element1, element2, element3,
 76 |                       element4, element5, element6,
 77 |                       element7, element8, element9)).reshape(3, 3)
 78 |     return R_mat
 79 | 
 80 | 
 81 | def azimuth_match_interval(azi_pred, azi_gt, view=24):
 82 |     offset = (360 / view) / 2
 83 |     step = 360 / view
 84 |     interval_pred = int((azi_pred + offset) % 360 // step)
 85 |     interval_gt = int((azi_gt + offset) % 360 // step)
 86 |     return interval_pred == interval_gt
 87 | 
 88 | 
 89 | def viewpoint_err(vp_pred, vp_gt):
 90 |     """ Compute the Rotation Matrix error between viewpoints"""
 91 |     R_pred = angles_to_matrix(vp_pred)
 92 |     R_gt = angles_to_matrix(vp_gt)
 93 |     R_err = np.arccos(((np.sum(R_pred * R_gt)).clip(-1., 3.) - 1.) / 2)
 94 |     return R_err
 95 | 
 96 | 
 97 | def pascal3d_eval(detpath, df, subset, classname, cachedir, ovthresh=0.5):
 98 |     """
 99 |     Top level function that does the PASCAL VOC evaluation.
100 |     detpath: Path to detections
101 |             detpath.format(classname) should produce the detection results file.
102 |     df: Data frame get from annotation csv file for the subset [train / val / test]
103 |     classname: Category name (duh)
104 |     cachedir: Directory for caching the annotations
105 |     [ovthresh]: Overlap threshold (default = 0.5)
106 |     """
107 |     # assumes detections are in detpath.format(classname)
108 |     # cachedir caches the annotations in a pickle file
109 | 
110 |     # first load gt
111 |     df = df[df.set == subset]
112 |     if not os.path.isdir(cachedir):
113 |         os.mkdir(cachedir)
114 |     cachefile = os.path.join(cachedir, '{}_annots.pkl'.format(subset))
115 | 
116 |     # read list of images
117 |     imagenames = np.unique(df.im_path).tolist()
118 | 
119 |     if not os.path.isfile(cachefile):
120 |         # load annotations
121 |         recs = {}
122 |         for i, imagename in enumerate(imagenames):
123 |             recs[imagename] = parse_rec(df, imagename)
124 |             if i % 100 == 0:
125 |                 print('Reading annotation for {:d}/{:d}'.format(i + 1, len(imagenames)))
126 |         # save
127 |         print('Saving cached annotations to {:s}'.format(cachefile))
128 |         with open(cachefile, 'wb') as f:
129 |             pickle.dump(recs, f)
130 |     else:
131 |         # load
132 |         with open(cachefile, 'rb') as f:
133 |             try:
134 |                 recs = pickle.load(f)
135 |             except:
136 |                 recs = pickle.load(f, encoding='bytes')
137 | 
138 |     # extract gt objects for this class
139 |     class_recs = {}
140 |     npos = 0
141 |     for imagename in imagenames:
142 |         R = [obj for obj in recs[imagename] if obj['class'] == classname]
143 |         bbox = np.array([x['bbox'] for x in R])
144 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
145 | 
146 |         det = [False] * len(R)
147 |         npos = npos + sum(~difficult)
148 |         class_recs[imagename] = {'bbox': bbox,
149 |                                  'difficult': difficult,
150 |                                  'det': det}
151 | 
152 |     # read dets
153 |     detfile = detpath.format(classname)
154 |     with open(detfile, 'r') as f:
155 |         lines = f.readlines()
156 | 
157 |     splitlines = [x.strip().split(' ') for x in lines]
158 |     image_ids = [x[0] for x in splitlines]
159 |     confidence = np.array([float(x[1]) for x in splitlines])
160 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
161 | 
162 |     nd = len(image_ids)
163 |     tp = np.zeros(nd)
164 |     fp = np.zeros(nd)
165 | 
166 |     if BB.shape[0] > 0:
167 |         # sort by descending confidence
168 |         sorted_ind = np.argsort(-confidence)
169 |         BB = BB[sorted_ind, :]
170 |         image_ids = [image_ids[x] for x in sorted_ind]
171 | 
172 |         # iterate for each detection
173 |         for d in range(nd):
174 |             R = class_recs[image_ids[d]]
175 |             bb = BB[d, :].astype(float)
176 | 
177 |             ovmax = -np.inf
178 |             BBGT = R['bbox'].astype(float)
179 | 
180 |             if BBGT.size > 0:
181 |                 # intersection
182 |                 ixmin = np.maximum(BBGT[:, 0], bb[0])
183 |                 iymin = np.maximum(BBGT[:, 1], bb[1])
184 |                 ixmax = np.minimum(BBGT[:, 2], bb[2])
185 |                 iymax = np.minimum(BBGT[:, 3], bb[3])
186 |                 iw = np.maximum(ixmax - ixmin + 1., 0.)
187 |                 ih = np.maximum(iymax - iymin + 1., 0.)
188 |                 inters = iw * ih
189 | 
190 |                 # union
191 |                 uni = (bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + \
192 |                       (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters
193 | 
194 |                 overlaps = inters / uni
195 |                 ovmax = np.max(overlaps)
196 |                 jmax = np.argmax(overlaps)
197 | 
198 |             # if the detection is correct
199 |             if ovmax > ovthresh:
200 |                 if R['difficult'][jmax]:
201 |                     continue
202 |                 if not R['det'][jmax]:
203 |                     R['det'][jmax] = 1
204 |                     tp[d] = 1.
205 |                 else:
206 |                     fp[d] = 1.
207 | 
208 |             else:
209 |                 fp[d] = 1.
210 | 
211 |     # compute metrics AP
212 |     fp = np.cumsum(fp)
213 |     tp = np.cumsum(tp)
214 |     rec = tp / float(npos)
215 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
216 |     ap = voc_ap(rec, prec)
217 | 
218 |     return ap


--------------------------------------------------------------------------------
/lib/datasets/tools/compute_prior.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | import os
 4 | import sys
 5 | 
 6 | NUM_ATTR_REL = 200
 7 | def cout_w(prob, num=NUM_ATTR_REL,dim=1):
 8 |     prob_weight = prob[:, :num]
 9 |     sum_value = np.sum(prob_weight, keepdims=True, axis=dim) + 0.1
10 |     prob_weight = prob_weight / np.repeat(sum_value, prob_weight.shape[dim], axis=dim)
11 |     return prob_weight
12 | 
13 | def cp_kl(a, b):
14 |     # compute kl diverse
15 |     if np.sum(a) == 0 or np.sum(b) == 0:
16 |         return 1
17 |     sum_ = a * np.log(a / b)
18 |     all_value = [x for x in sum_ if str(x) != 'nan' and str(x) != 'inf']
19 |     kl = np.sum(all_value)
20 |     return kl
21 | 
22 | def compute_js(attr_prob):
23 |     cls_num = attr_prob.shape[0]
24 |     similarity = np.zeros((cls_num, cls_num))
25 |     similarity[0, 1:] = 1
26 |     similarity[1:, 0] = 1
27 |     for i in range(1, cls_num):
28 |         if i % 50 == 0:
29 |             print('had proccessed {} cls...\n'.format(i))
30 |         for j in range(1, cls_num):
31 |             if i == j:
32 |                 similarity[i,j] = 0
33 |             else:
34 |                 similarity[i,j] = 0.5 * (cp_kl(attr_prob[i, :], 0.5*(attr_prob[i, :] + attr_prob[j,:]))
35 |                                          + cp_kl(attr_prob[j, :], 0.5*(attr_prob[i, :] + attr_prob[j, :])))
36 |     return similarity
37 | 
38 | if __name__=='__main__':
39 |     data_path = '/data/VisualGenome/graph/'
40 |     dim_ = 1000
41 |     ## Compute attribute knowledge by JS-diversion
42 |     graph_a = pickle.load(open(data_path + 'vg_attr_frequency_1000.pkl', 'rb'))
43 | 
44 |     ## You can get part of graph_a and match name with your datasets
45 |     #  We give an example of compute graph of VisualGenome with 1000 classes
46 |     #  first line of graph_a is background
47 |     graph_a = cout_w(graph_a, num=len(graph_a))
48 |     graph_a = compute_js(graph_a)
49 |     graph_a = 1 - graph_a
50 |     pickle.dump(graph_a, open(data_path + 'vg_graph_a.pkl', 'wb'))
51 | 
52 |     ## Compute relation knowledge
53 |     graph_r = pickle.load(open(data_path + 'vg_pair_frequency_1000.pkl', 'rb'))
54 |     ## You can get part of graph_a and match name with your datasets
55 |     #  We give an example of compute graph of VisualGenome with 1000 classes
56 |     relation_matrix = np.zeros((dim_, dim_))
57 |     relation_matrix = graph_r + graph_r.transpose()
58 |     relation_matrix_row_sum = relation_matrix.sum(1)
59 |     for i in range(dim_):
60 |         relation_matrix[i, i] = relation_matrix_row_sum[i] + 1.
61 |     prob_relation_matrix = np.zeros((dim_, dim_))
62 |     for i in range(dim_):
63 |         for j in range(dim_):
64 |             prob_relation_matrix[i, j] = relation_matrix[i, j] / (
65 |                         np.sqrt(relation_matrix[i, i]) * np.sqrt(relation_matrix[j, j]))
66 |     prob_relation_matrix_ba = np.zeros((dim_ + 1, dim_ + 1))
67 |     prob_relation_matrix_ba[1:, 1:] = prob_relation_matrix
68 |     print(prob_relation_matrix_ba.shape)
69 |     pickle.dump(prob_relation_matrix_ba, open(data_path + 'vg_graph_r.pkl', 'wb'))
70 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import sys
 4 | 
 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 7 | so that it's consistent with those computed by Jan Hosang (see:
 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 9 |   computing/research/object-recognition-and-scene-understanding/how-
10 |   good-are-detection-proposals-really/)
11 | 
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 | 
16 | def munge(src_dir):
17 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 | 
20 |     files = os.listdir(src_dir)
21 |     for fn in files:
22 |         base, ext = os.path.splitext(fn)
23 |         # first 14 chars / first 22 chars / all chars + .mat
24 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 |         first = base[:14]
26 |         second = base[:22]
27 |         dst_dir = os.path.join('MCG', 'mat', first, second)
28 |         if not os.path.exists(dst_dir):
29 |             os.makedirs(dst_dir)
30 |         src = os.path.join(src_dir, fn)
31 |         dst = os.path.join(dst_dir, fn)
32 |         print('MV: {} -> {}'.format(src, dst))
33 |         os.rename(src, dst)
34 | 
35 | if __name__ == '__main__':
36 |     # src_dir should look something like:
37 |     #  src_dir = 'MCG-COCO-val2014-boxes'
38 |     src_dir = sys.argv[1]
39 |     munge(src_dir)
40 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | 
 15 | def parse_rec(filename):
 16 |   """ Parse a PASCAL VOC xml file """
 17 |   tree = ET.parse(filename)
 18 |   objects = []
 19 |   for obj in tree.findall('object'):
 20 |     obj_struct = {}
 21 |     obj_struct['name'] = obj.find('name').text
 22 |     obj_struct['pose'] = obj.find('pose').text
 23 |     obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |     obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |     bbox = obj.find('bndbox')
 26 |     obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                           int(bbox.find('ymin').text),
 28 |                           int(bbox.find('xmax').text),
 29 |                           int(bbox.find('ymax').text)]
 30 |     objects.append(obj_struct)
 31 | 
 32 |   return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |   Compute VOC AP given precision and recall.
 38 |   If use_07_metric is true, uses the
 39 |   VOC 07 11 point method (default:False).
 40 |   """
 41 |   if use_07_metric:
 42 |     # 11 point metric
 43 |     ap = 0.
 44 |     for t in np.arange(0., 1.1, 0.1):
 45 |       if np.sum(rec >= t) == 0:
 46 |         p = 0
 47 |       else:
 48 |         p = np.max(prec[rec >= t])
 49 |       ap = ap + p / 11.
 50 |   else:
 51 |     # correct AP calculation
 52 |     # first append sentinel values at the end
 53 |     mrec = np.concatenate(([0.], rec, [1.]))
 54 |     mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |     # compute the precision envelope
 57 |     for i in range(mpre.size - 1, 0, -1):
 58 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |     # to calculate area under PR curve, look for points
 61 |     # where X axis (recall) changes value
 62 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |     # and sum (\Delta recall) * prec
 65 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |   return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath,
 70 |              annopath,
 71 |              imagesetfile,
 72 |              classname,
 73 |              cachedir,
 74 |              ovthresh=0.5,
 75 |              use_07_metric=False):
 76 |   """rec, prec, ap = voc_eval(detpath,
 77 |                               annopath,
 78 |                               imagesetfile,
 79 |                               classname,
 80 |                               [ovthresh],
 81 |                               [use_07_metric])
 82 | 
 83 |   Top level function that does the PASCAL VOC evaluation.
 84 | 
 85 |   detpath: Path to detections
 86 |       detpath.format(classname) should produce the detection results file.
 87 |   annopath: Path to annotations
 88 |       annopath.format(imagename) should be the xml annotations file.
 89 |   imagesetfile: Text file containing the list of images, one image per line.
 90 |   classname: Category name (duh)
 91 |   cachedir: Directory for caching the annotations
 92 |   [ovthresh]: Overlap threshold (default = 0.5)
 93 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
 94 |       (default False)
 95 |   """
 96 |   # assumes detections are in detpath.format(classname)
 97 |   # assumes annotations are in annopath.format(imagename)
 98 |   # assumes imagesetfile is a text file with each line an image name
 99 |   # cachedir caches the annotations in a pickle file
100 | 
101 |   # first load gt
102 |   if not os.path.isdir(cachedir):
103 |     os.mkdir(cachedir)
104 |   cachefile = os.path.join(cachedir, 'annots.pkl')
105 | 
106 |   # read list of images
107 |   with open(imagesetfile, 'r') as f:
108 |     lines = f.readlines()
109 |   imagenames = [x.strip() for x in lines]
110 | 
111 |   if not os.path.isfile(cachefile):
112 |     # load annotations
113 |     recs = {}
114 |     for i, imagename in enumerate(imagenames):
115 |       recs[imagename] = parse_rec(annopath.format(imagename))
116 |       if i % 100 == 0:
117 |         print('Reading annotation for {:d}/{:d}'.format(
118 |           i + 1, len(imagenames)))
119 |     # save
120 |     print('Saving cached annotations to {:s}'.format(cachefile))
121 |     with open(cachefile, 'wb') as f:
122 |       pickle.dump(recs, f)
123 |   else:
124 |     # load
125 |     with open(cachefile, 'rb') as f:
126 |       try:
127 |         recs = pickle.load(f)
128 |       except:
129 |         recs = pickle.load(f, encoding='bytes')
130 | 
131 |   # extract gt objects for this class
132 |   class_recs = {}
133 |   npos = 0
134 |   for imagename in imagenames:
135 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
136 |     bbox = np.array([x['bbox'] for x in R])
137 |     difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
138 |     det = [False] * len(R)
139 |     npos = npos + sum(~difficult)
140 |     class_recs[imagename] = {'bbox': bbox,
141 |                              'difficult': difficult,
142 |                              'det': det}
143 | 
144 |   # read dets
145 |   detfile = detpath.format(classname)
146 |   with open(detfile, 'r') as f:
147 |     lines = f.readlines()
148 | 
149 |   if len(lines) == 0:
150 |         # No detection examples
151 |         return 0, 0, 0, 0, npos
152 | 
153 |   splitlines = [x.strip().split(' ') for x in lines]
154 |   image_ids = [x[0] for x in splitlines]
155 |   confidence = np.array([float(x[1]) for x in splitlines])
156 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
157 | 
158 |   nd = len(image_ids)
159 |   tp = np.zeros(nd)
160 |   fp = np.zeros(nd)
161 | 
162 |   if BB.shape[0] > 0:
163 |     # sort by confidence
164 |     sorted_ind = np.argsort(-confidence)
165 |     sorted_scores = np.sort(-confidence)
166 |     BB = BB[sorted_ind, :]
167 |     image_ids = [image_ids[x] for x in sorted_ind]
168 | 
169 |     # go down dets and mark TPs and FPs
170 |     for d in range(nd):
171 |       R = class_recs[image_ids[d]]
172 |       bb = BB[d, :].astype(float)
173 |       ovmax = -np.inf
174 |       BBGT = R['bbox'].astype(float)
175 | 
176 |       if BBGT.size > 0:
177 |         # compute overlaps
178 |         # intersection
179 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
180 |         iymin = np.maximum(BBGT[:, 1], bb[1])
181 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
182 |         iymax = np.minimum(BBGT[:, 3], bb[3])
183 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
184 |         ih = np.maximum(iymax - iymin + 1., 0.)
185 |         inters = iw * ih
186 | 
187 |         # union
188 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
189 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
190 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
191 | 
192 |         overlaps = inters / uni
193 |         ovmax = np.max(overlaps)
194 |         jmax = np.argmax(overlaps)
195 | 
196 |       if ovmax > ovthresh:
197 |         if not R['difficult'][jmax]:
198 |           if not R['det'][jmax]:
199 |             tp[d] = 1.
200 |             R['det'][jmax] = 1
201 |           else:
202 |             fp[d] = 1.
203 |       else:
204 |         fp[d] = 1.
205 | 
206 |   # compute precision recall
207 |   fp = np.cumsum(fp)
208 |   tp = np.cumsum(tp)
209 |   rec = tp / float(npos)
210 |   # avoid divide by zero in case the first detection matches a difficult
211 |   # ground truth
212 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
213 |   ap = voc_ap(rec, prec, use_07_metric)
214 | 
215 |   return rec, prec, ap
216 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | export CXXFLAGS="-std=c++11"
 6 | export CFLAGS="-std=c99"
 7 | 
 8 | python setup.py build_ext --inplace
 9 | rm -rf build
10 | 
11 | CUDA_ARCH="-gencode arch=compute_30,code=sm_30 \
12 |            -gencode arch=compute_35,code=sm_35 \
13 |            -gencode arch=compute_50,code=sm_50 \
14 |            -gencode arch=compute_52,code=sm_52 \
15 |            -gencode arch=compute_60,code=sm_60 \
16 |            -gencode arch=compute_61,code=sm_61 "
17 |            	   # -gencode arch=compute_70,code=sm_70 ""
18 | 
19 | # compile NMS
20 | cd model/nms/src
21 | echo "Compiling nms kernels by nvcc..."
22 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
23 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
24 | 
25 | cd ../
26 | python build.py
27 | 
28 | # compile roi_pooling
29 | cd ../../
30 | cd model/roi_pooling/src
31 | echo "Compiling roi pooling kernels by nvcc..."
32 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
33 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
34 | cd ../
35 | python build.py
36 | 
37 | # compile roi_align
38 | cd ../../
39 | cd model/roi_align/src
40 | echo "Compiling roi align kernels by nvcc..."
41 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
42 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
43 | cd ../
44 | python build.py
45 | 
46 | # compile roi_crop
47 | cd ../../
48 | cd model/roi_crop/src
49 | echo "Compiling roi crop kernels by nvcc..."
50 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
51 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC $CUDA_ARCH
52 | cd ../
53 | python build.py
54 | 


--------------------------------------------------------------------------------
/lib/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/__init__.py


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/faster_rcnn/__init__.py


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/vgg16.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Tensorflow Faster R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Xinlei Chen
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.autograd import Variable
14 | import math
15 | import torchvision.models as models
16 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
17 | import pdb
18 | 
19 | class vgg16(_fasterRCNN):
20 |   def __init__(self, classes, pretrained=False, class_agnostic=False):
21 |     self.model_path = 'data/pretrained_model/vgg16_caffe.pth'
22 |     self.dout_base_model = 512
23 |     self.pretrained = pretrained
24 |     self.class_agnostic = class_agnostic
25 | 
26 |     _fasterRCNN.__init__(self, classes, class_agnostic)
27 | 
28 |   def _init_modules(self):
29 |     vgg = models.vgg16()
30 |     if self.pretrained:
31 |         print("Loading pretrained weights from %s" %(self.model_path))
32 |         state_dict = torch.load(self.model_path)
33 |         vgg.load_state_dict({k:v for k,v in state_dict.items() if k in vgg.state_dict()})
34 | 
35 |     vgg.classifier = nn.Sequential(*list(vgg.classifier._modules.values())[:-1])
36 | 
37 |     # not using the last maxpool layer
38 |     self.RCNN_base = nn.Sequential(*list(vgg.features._modules.values())[:-1])
39 | 
40 |     # Fix the layers before conv3:
41 |     for layer in range(10):
42 |       for p in self.RCNN_base[layer].parameters(): p.requires_grad = False
43 | 
44 |     # self.RCNN_base = _RCNN_base(vgg.features, self.classes, self.dout_base_model)
45 | 
46 |     self.RCNN_top = vgg.classifier
47 | 
48 |     # not using the last maxpool layer
49 |     self.RCNN_cls_score = nn.Linear(4096, self.n_classes)
50 | 
51 |     if self.class_agnostic:
52 |       self.RCNN_bbox_pred = nn.Linear(4096, 4)
53 |     else:
54 |       self.RCNN_bbox_pred = nn.Linear(4096, 4 * self.n_classes)      
55 | 
56 |   def _head_to_tail(self, pool5):
57 |     
58 |     pool5_flat = pool5.view(pool5.size(0), -1)
59 |     fc7 = self.RCNN_top(pool5_flat)
60 | 
61 |     return fc7
62 | 
63 | 


--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/nms/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/nms/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_cpu.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | def nms_cpu(dets, thresh):
  7 |     dets = dets.cpu().numpy()
  8 |     x1 = dets[:, 0]
  9 |     y1 = dets[:, 1]
 10 |     x2 = dets[:, 2]
 11 |     y2 = dets[:, 3]
 12 |     scores = dets[:, 4]
 13 | 
 14 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 15 |     order = scores.argsort()[::-1]
 16 | 
 17 |     keep = []
 18 |     while order.size > 0:
 19 |         i = order.item(0)
 20 |         keep.append(i)
 21 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 22 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 23 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 24 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 25 | 
 26 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 27 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 28 |         inter = w * h
 29 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 30 | 
 31 |         inds = np.where(ovr <= thresh)[0]
 32 |         order = order[inds + 1]
 33 | 
 34 |     return torch.IntTensor(keep)
 35 | 
 36 | 
 37 | 
 38 | 
 39 | def nms_cpu_np(dets, thresh):
 40 |     x1 = dets[:, 0]
 41 |     y1 = dets[:, 1]
 42 |     x2 = dets[:, 2]
 43 |     y2 = dets[:, 3]
 44 |     scores = dets[:, 4]
 45 | 
 46 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 47 |     order = scores.argsort()[::-1]
 48 | 
 49 |     keep = []
 50 |     while order.size > 0:
 51 |         i = order.item(0)
 52 |         keep.append(i)
 53 |         xx1 = np.maximum(x1[i], x1[order[1:]])
 54 |         yy1 = np.maximum(y1[i], y1[order[1:]])
 55 |         xx2 = np.minimum(x2[i], x2[order[1:]])
 56 |         yy2 = np.minimum(y2[i], y2[order[1:]])
 57 | 
 58 |         w = np.maximum(0.0, xx2 - xx1 + 1)
 59 |         h = np.maximum(0.0, yy2 - yy1 + 1)
 60 |         inter = w * h
 61 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
 62 | 
 63 |         inds = np.where(ovr <= thresh)[0]
 64 |         order = order[inds + 1]
 65 | 
 66 |     return keep
 67 | 
 68 | 
 69 | 
 70 | def soft_nms_cpu(dets, threshold=0.001, Nt=0.3, method=1):
 71 |     boxes = dets.cpu().numpy()
 72 |     N = dets.shape[0]
 73 |     pos = 0
 74 |     maxscore = 0
 75 |     maxpos = 0
 76 |     
 77 |     for i in range(N):
 78 |         maxscore = boxes[i, 4]
 79 |         maxpos = i
 80 |         
 81 |         tx1 = boxes[i,0]
 82 |         ty1 = boxes[i,1]
 83 |         tx2 = boxes[i,2]
 84 |         ty2 = boxes[i,3]
 85 |         ts = boxes[i,4]
 86 |         
 87 |         pos = i + 1
 88 |         # get max box
 89 |         while pos < N:
 90 |             if maxscore < boxes[pos, 4]:
 91 |                 maxscore = boxes[pos, 4]
 92 |                 maxpos = pos
 93 |             pos = pos + 1
 94 |         
 95 |         # add max box as a detection
 96 |         boxes[i,0] = boxes[maxpos,0]
 97 |         boxes[i,1] = boxes[maxpos,1]
 98 |         boxes[i,2] = boxes[maxpos,2]
 99 |         boxes[i,3] = boxes[maxpos,3]
100 |         boxes[i,4] = boxes[maxpos,4]
101 |         
102 |         # swap ith box with position of max box
103 |         boxes[maxpos,0] = tx1
104 |         boxes[maxpos,1] = ty1
105 |         boxes[maxpos,2] = tx2
106 |         boxes[maxpos,3] = ty2
107 |         boxes[maxpos,4] = ts
108 |         
109 |         tx1 = boxes[i,0]
110 |         ty1 = boxes[i,1]
111 |         tx2 = boxes[i,2]
112 |         ty2 = boxes[i,3]
113 |         ts = boxes[i,4]
114 |         
115 |         pos = i + 1
116 |         # NMS iterations, note that N changes if detection boxes fall below threshold
117 |         while pos < N:
118 |             x1 = boxes[pos, 0]
119 |             y1 = boxes[pos, 1]
120 |             x2 = boxes[pos, 2]
121 |             y2 = boxes[pos, 3]
122 |             s = boxes[pos, 4]
123 |         
124 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
125 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
126 |             if iw > 0:
127 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
128 |                 if ih > 0:
129 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
130 |                     ov = iw * ih / ua #iou between max box and detection box
131 |         
132 |                     if method == 1: # linear
133 |                         if ov > Nt: 
134 |                             weight = 1 - ov
135 |                         else:
136 |                             weight = 1
137 |                     elif method == 2: # gaussian
138 |                         weight = np.exp(-(ov * ov)/sigma)
139 |                     else: # original NMS
140 |                         if ov > Nt: 
141 |                             weight = 0
142 |                         else:
143 |                             weight = 1
144 |         
145 |                     boxes[pos, 4] = weight*boxes[pos, 4]
146 |         		    
147 |         		    # if box score falls below threshold, discard the box by swapping with last box
148 |         		    # update N
149 |                     if boxes[pos, 4] < threshold:
150 |                         boxes[pos,0] = boxes[N-1, 0]
151 |                         boxes[pos,1] = boxes[N-1, 1]
152 |                         boxes[pos,2] = boxes[N-1, 2]
153 |                         boxes[pos,3] = boxes[N-1, 3]
154 |                         boxes[pos,4] = boxes[N-1, 4]
155 |                         N = N - 1
156 |                         pos = pos - 1
157 |         
158 |             pos = pos + 1
159 | 
160 |     keep = [i for i in range(N)]
161 |     return keep, boxes
162 |     
163 | 
164 | def nms_domain(dets, dets_small, thresh_small=0.85, thresh_big=0.5):
165 | #    dets = dets.cpu().numpy()
166 | #    dets_small = dets_small.cpu().numpy()
167 |     x1 = dets[:, 0]
168 |     y1 = dets[:, 1]
169 |     x2 = dets[:, 2]
170 |     y2 = dets[:, 3]
171 |     scores = dets[:, 4]
172 | 
173 |     x21 = dets_small[:, 0]
174 |     y21 = dets_small[:, 1]
175 |     x22 = dets_small[:, 2]
176 |     y22 = dets_small[:, 3]
177 |     scores2 = dets_small[:, 4]
178 | 
179 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
180 |     order = scores.argsort()[::-1]
181 | 
182 |     areas2 = (x22 - x21 + 1) * (y22 - y21 + 1)
183 |     order2 = scores2.argsort()[::-1]
184 |     
185 |     throw = set()
186 |     keep = set(list(range(len(dets_small))))
187 |     for i in range(len(dets)):
188 |         xx1 = np.maximum(x1[i], x21)
189 |         yy1 = np.maximum(y1[i], y21)
190 |         xx2 = np.minimum(x2[i], x22)
191 |         yy2 = np.minimum(y2[i], y22)
192 | 
193 |         w = np.maximum(0.0, xx2 - xx1 + 1)
194 |         h = np.maximum(0.0, yy2 - yy1 + 1)
195 |         inter = w * h
196 | 
197 |         ovr_1 = inter / (areas[i])
198 |         ovr_2 = inter / (areas2)
199 | 
200 |         throw_array = np.where((ovr_2 > thresh_small) & (ovr_1 < thresh_big))[0].tolist()
201 |         throw.update(throw_array)
202 |     keep = list(keep - throw)
203 |     return keep
204 |     
205 |     


--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from model.utils.config import cfg
 9 | if torch.cuda.is_available():
10 |     from model.nms.nms_gpu import nms_gpu
11 | from model.nms.nms_cpu import nms_cpu
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     # ---numpy version---
18 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     # ---pytorch version---
20 | 
21 |     return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)
22 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         THCudaTensor_size(state, boxes_host, 0),
15 | 		         THCudaTensor_size(state, boxes_host, 1),
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/nms/src/nms_cuda_kernel.cu.o


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_align_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             raise NotImplementedError
30 | 
31 |         return output
32 | 
33 |     def backward(self, grad_output):
34 |         assert(self.feature_size is not None and grad_output.is_cuda)
35 | 
36 |         batch_size, num_channels, data_height, data_width = self.feature_size
37 | 
38 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
39 |                                   data_width).zero_()
40 |         roi_align.roi_align_backward_cuda(self.aligned_height,
41 |                                           self.aligned_width,
42 |                                           self.spatial_scale, grad_output,
43 |                                           self.rois, grad_input)
44 | 
45 |         # print grad_input
46 | 
47 |         return grad_input, None
48 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
 16 |                                     const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
 17 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 18 |             // (n, c, ph, pw) is an element in the aligned output
 19 |             // int n = index;
 20 |             // int pw = n % aligned_width;
 21 |             // n /= aligned_width;
 22 |             // int ph = n % aligned_height;
 23 |             // n /= aligned_height;
 24 |             // int c = n % channels;
 25 |             // n /= channels;
 26 | 
 27 |             int pw = index % aligned_width;
 28 |             int ph = (index / aligned_width) % aligned_height;
 29 |             int c  = (index / aligned_width / aligned_height) % channels;
 30 |             int n  = index / aligned_width / aligned_height / channels;
 31 | 
 32 |             // bottom_rois += n * 5;
 33 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
 34 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
 35 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
 36 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
 37 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
 38 | 
 39 |             // Force malformed ROIs to be 1x1
 40 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
 41 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
 42 |             float bin_size_h = roi_height / (aligned_height - 1.);
 43 |             float bin_size_w = roi_width / (aligned_width - 1.);
 44 | 
 45 |             float h = (float)(ph) * bin_size_h + roi_start_h;
 46 |             float w = (float)(pw) * bin_size_w + roi_start_w;
 47 | 
 48 |             int hstart = fminf(floor(h), height - 2);
 49 |             int wstart = fminf(floor(w), width - 2);
 50 | 
 51 |             int img_start = roi_batch_ind * channels * height * width;
 52 | 
 53 |             // bilinear interpolation
 54 |             if (h < 0 || h >= height || w < 0 || w >= width) {
 55 |                 top_data[index] = 0.;
 56 |             } else {
 57 |                 float h_ratio = h - (float)(hstart);
 58 |                 float w_ratio = w - (float)(wstart);
 59 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
 60 |                 int upright = upleft + 1;
 61 |                 int downleft = upleft + width;
 62 |                 int downright = downleft + 1;
 63 | 
 64 |                 top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
 65 |                     + bottom_data[upright] * (1. - h_ratio) * w_ratio
 66 |                     + bottom_data[downleft] * h_ratio * (1. - w_ratio)
 67 |                     + bottom_data[downright] * h_ratio * w_ratio;
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 | 
 73 |     int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
 74 |                                const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
 75 |         const int kThreadsPerBlock = 1024;
 76 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
 77 |         cudaError_t err;
 78 | 
 79 | 
 80 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 81 |           output_size, bottom_data, spatial_scale, height, width, channels,
 82 |           aligned_height, aligned_width, bottom_rois, top_data);
 83 | 
 84 |         err = cudaGetLastError();
 85 |         if(cudaSuccess != err) {
 86 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 87 |             exit( -1 );
 88 |         }
 89 | 
 90 |         return 1;
 91 |     }
 92 | 
 93 | 
 94 |     __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
 95 |                                      const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
 96 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 97 | 
 98 |             // (n, c, ph, pw) is an element in the aligned output
 99 |             int pw = index % aligned_width;
100 |             int ph = (index / aligned_width) % aligned_height;
101 |             int c  = (index / aligned_width / aligned_height) % channels;
102 |             int n  = index / aligned_width / aligned_height / channels;
103 | 
104 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
105 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 |             /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 |             /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 |             /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 |             /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 | 
114 |             // Force malformed ROIs to be 1x1
115 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 |             float bin_size_h = roi_height / (aligned_height - 1.);
118 |             float bin_size_w = roi_width / (aligned_width - 1.);
119 | 
120 |             float h = (float)(ph) * bin_size_h + roi_start_h;
121 |             float w = (float)(pw) * bin_size_w + roi_start_w;
122 | 
123 |             int hstart = fminf(floor(h), height - 2);
124 |             int wstart = fminf(floor(w), width - 2);
125 | 
126 |             int img_start = roi_batch_ind * channels * height * width;
127 | 
128 |             // bilinear interpolation
129 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 |                 float h_ratio = h - (float)(hstart);
131 |                 float w_ratio = w - (float)(wstart);
132 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
133 |                 int upright = upleft + 1;
134 |                 int downleft = upleft + width;
135 |                 int downright = downleft + 1;
136 | 
137 |                 atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 |                 atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 |                 atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 |                 atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 |             }
142 |         }
143 |     }
144 | 
145 |     int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 |                                 const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 |         const int kThreadsPerBlock = 1024;
148 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
149 |         cudaError_t err;
150 | 
151 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 |           output_size, top_diff, spatial_scale, height, width, channels,
153 |           aligned_height, aligned_width, bottom_diff, bottom_rois);
154 | 
155 |         err = cudaGetLastError();
156 |         if(cudaSuccess != err) {
157 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 |             exit( -1 );
159 |         }
160 | 
161 |         return 1;
162 |     }
163 | 
164 | 
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_align/src/roi_align_kernel.cu.o


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = ['src/roi_crop.c']
 9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_crop_cuda.c']
16 |     headers += ['src/roi_crop_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_crop',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include "roi_crop_cuda_kernel.h"
  5 | 
  6 | #define real float
  7 | 
  8 | // this symbol will be resolved automatically from PyTorch libs
  9 | extern THCState *state;
 10 | 
 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
 12 | // we assume BHWD format in inputImages
 13 | // we assume BHW(YX) format on grids
 14 | 
 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
 16 | //  THCState *state = getCutorchState(L);
 17 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 18 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 19 | //  THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 20 | 
 21 |   int success = 0;
 22 |   success = BilinearSamplerBHWD_updateOutput_cuda_kernel(output->size[1],
 23 |                                                output->size[3],
 24 |                                                output->size[2],
 25 |                                                output->size[0],
 26 |                                                THCudaTensor_size(state, inputImages, 1),
 27 |                                                THCudaTensor_size(state, inputImages, 2),
 28 |                                                THCudaTensor_size(state, inputImages, 3),
 29 |                                                THCudaTensor_size(state, inputImages, 0),
 30 |                                                THCudaTensor_data(state, inputImages),
 31 |                                                THCudaTensor_stride(state, inputImages, 0),
 32 |                                                THCudaTensor_stride(state, inputImages, 1),
 33 |                                                THCudaTensor_stride(state, inputImages, 2),
 34 |                                                THCudaTensor_stride(state, inputImages, 3),
 35 |                                                THCudaTensor_data(state, grids),
 36 |                                                THCudaTensor_stride(state, grids, 0),
 37 |                                                THCudaTensor_stride(state, grids, 3),
 38 |                                                THCudaTensor_stride(state, grids, 1),
 39 |                                                THCudaTensor_stride(state, grids, 2),
 40 |                                                THCudaTensor_data(state, output),
 41 |                                                THCudaTensor_stride(state, output, 0),
 42 |                                                THCudaTensor_stride(state, output, 1),
 43 |                                                THCudaTensor_stride(state, output, 2),
 44 |                                                THCudaTensor_stride(state, output, 3),
 45 |                                                THCState_getCurrentStream(state));
 46 | 
 47 |   //check for errors
 48 |   if (!success) {
 49 |     THError("aborting");
 50 |   }
 51 |   return 1;
 52 | }
 53 | 
 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
 55 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput)
 56 | {
 57 | //  THCState *state = getCutorchState(L);
 58 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 59 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 60 | //  THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 61 | //  THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
 62 | //  THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
 63 | 
 64 |   int success = 0;
 65 |   success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(gradOutput->size[1],
 66 |                                                   gradOutput->size[3],
 67 |                                                   gradOutput->size[2],
 68 |                                                   gradOutput->size[0],
 69 |                                                   THCudaTensor_size(state, inputImages, 1),
 70 |                                                   THCudaTensor_size(state, inputImages, 2),
 71 |                                                   THCudaTensor_size(state, inputImages, 3),
 72 |                                                   THCudaTensor_size(state, inputImages, 0),
 73 |                                                   THCudaTensor_data(state, inputImages),
 74 |                                                   THCudaTensor_stride(state, inputImages, 0),
 75 |                                                   THCudaTensor_stride(state, inputImages, 1),
 76 |                                                   THCudaTensor_stride(state, inputImages, 2),
 77 |                                                   THCudaTensor_stride(state, inputImages, 3),
 78 |                                                   THCudaTensor_data(state, grids),
 79 |                                                   THCudaTensor_stride(state, grids, 0),
 80 |                                                   THCudaTensor_stride(state, grids, 3),
 81 |                                                   THCudaTensor_stride(state, grids, 1),
 82 |                                                   THCudaTensor_stride(state, grids, 2),
 83 |                                                   THCudaTensor_data(state, gradInputImages),
 84 |                                                   THCudaTensor_stride(state, gradInputImages, 0),
 85 |                                                   THCudaTensor_stride(state, gradInputImages, 1),
 86 |                                                   THCudaTensor_stride(state, gradInputImages, 2),
 87 |                                                   THCudaTensor_stride(state, gradInputImages, 3),
 88 |                                                   THCudaTensor_data(state, gradGrids),
 89 |                                                   THCudaTensor_stride(state, gradGrids, 0),
 90 |                                                   THCudaTensor_stride(state, gradGrids, 3),
 91 |                                                   THCudaTensor_stride(state, gradGrids, 1),
 92 |                                                   THCudaTensor_stride(state, gradGrids, 2),
 93 |                                                   THCudaTensor_data(state, gradOutput),
 94 |                                                   THCudaTensor_stride(state, gradOutput, 0),
 95 |                                                   THCudaTensor_stride(state, gradOutput, 1),
 96 |                                                   THCudaTensor_stride(state, gradOutput, 2),
 97 |                                                   THCudaTensor_stride(state, gradOutput, 3),
 98 |                                                   THCState_getCurrentStream(state));
 99 | 
100 |   //check for errors
101 |   if (!success) {
102 |     THError("aborting");
103 |   }
104 |   return 1;
105 | }
106 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_crop/src/roi_crop_cuda_kernel.cu.o


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/_ext/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_pooling_cuda.c']
15 |     headers += ['src/roi_pooling_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_pooling.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_pooling',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/functions/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/modules/__init__.py


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/roi_pooling/src/roi_pooling.cu.o


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/rpn/__init__.py


--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | 
  9 | import numpy as np
 10 | import pdb
 11 | 
 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 13 | #
 14 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 15 | #    >> anchors
 16 | #
 17 | #    anchors =
 18 | #
 19 | #       -83   -39   100    56
 20 | #      -175   -87   192   104
 21 | #      -359  -183   376   200
 22 | #       -55   -55    72    72
 23 | #      -119  -119   136   136
 24 | #      -247  -247   264   264
 25 | #       -35   -79    52    96
 26 | #       -79  -167    96   184
 27 | #      -167  -343   184   360
 28 | 
 29 | #array([[ -83.,  -39.,  100.,   56.],
 30 | #       [-175.,  -87.,  192.,  104.],
 31 | #       [-359., -183.,  376.,  200.],
 32 | #       [ -55.,  -55.,   72.,   72.],
 33 | #       [-119., -119.,  136.,  136.],
 34 | #       [-247., -247.,  264.,  264.],
 35 | #       [ -35.,  -79.,   52.,   96.],
 36 | #       [ -79., -167.,   96.,  184.],
 37 | #       [-167., -343.,  184.,  360.]])
 38 | 
 39 | try:
 40 |     xrange          # Python 2
 41 | except NameError:
 42 |     xrange = range  # Python 3
 43 | 
 44 | 
 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 46 |                      scales=2**np.arange(3, 6)):
 47 |     """
 48 |     Generate anchor (reference) windows by enumerating aspect ratios X
 49 |     scales wrt a reference (0, 0, 15, 15) window.
 50 |     """
 51 | 
 52 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 53 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 54 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 55 |                          for i in xrange(ratio_anchors.shape[0])])
 56 |     return anchors
 57 | 
 58 | def _whctrs(anchor):
 59 |     """
 60 |     Return width, height, x center, and y center for an anchor (window).
 61 |     """
 62 | 
 63 |     w = anchor[2] - anchor[0] + 1
 64 |     h = anchor[3] - anchor[1] + 1
 65 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 66 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 67 |     return w, h, x_ctr, y_ctr
 68 | 
 69 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 70 |     """
 71 |     Given a vector of widths (ws) and heights (hs) around a center
 72 |     (x_ctr, y_ctr), output a set of anchors (windows).
 73 |     """
 74 | 
 75 |     ws = ws[:, np.newaxis]
 76 |     hs = hs[:, np.newaxis]
 77 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 78 |                          y_ctr - 0.5 * (hs - 1),
 79 |                          x_ctr + 0.5 * (ws - 1),
 80 |                          y_ctr + 0.5 * (hs - 1)))
 81 |     return anchors
 82 | 
 83 | def _ratio_enum(anchor, ratios):
 84 |     """
 85 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 86 |     """
 87 | 
 88 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 89 |     size = w * h
 90 |     size_ratios = size / ratios
 91 |     ws = np.round(np.sqrt(size_ratios))
 92 |     hs = np.round(ws * ratios)
 93 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 94 |     return anchors
 95 | 
 96 | def _scale_enum(anchor, scales):
 97 |     """
 98 |     Enumerate a set of anchors for each scale wrt an anchor.
 99 |     """
100 | 
101 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
102 |     ws = w * scales
103 |     hs = h * scales
104 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 |     return anchors
106 | 
107 | if __name__ == '__main__':
108 |     import time
109 |     t = time.time()
110 |     a = generate_anchors()
111 |     print(time.time() - t)
112 |     print(a)
113 |     from IPython import embed; embed()
114 | 


--------------------------------------------------------------------------------
/lib/model/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | # --------------------------------------------------------
  9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
 10 | # --------------------------------------------------------
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import numpy as np
 15 | import math
 16 | import yaml
 17 | from model.utils.config import cfg
 18 | from .generate_anchors import generate_anchors
 19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
 20 | from model.nms.nms_wrapper import nms
 21 | 
 22 | import pdb
 23 | 
 24 | DEBUG = False
 25 | 
 26 | class _ProposalLayer(nn.Module):
 27 |     """
 28 |     Outputs object detection proposals by applying estimated bounding-box
 29 |     transformations to a set of regular boxes (called "anchors").
 30 |     """
 31 | 
 32 |     def __init__(self, feat_stride, scales, ratios):
 33 |         super(_ProposalLayer, self).__init__()
 34 | 
 35 |         self._feat_stride = feat_stride
 36 |         self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 
 37 |             ratios=np.array(ratios))).float()
 38 |         self._num_anchors = self._anchors.size(0)
 39 | 
 40 |         # rois blob: holds R regions of interest, each is a 5-tuple
 41 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 42 |         # rectangle (x1, y1, x2, y2)
 43 |         # top[0].reshape(1, 5)
 44 |         #
 45 |         # # scores blob: holds scores for R regions of interest
 46 |         # if len(top) > 1:
 47 |         #     top[1].reshape(1, 1, 1, 1)
 48 | 
 49 |     def forward(self, input):
 50 | 
 51 |         # Algorithm:
 52 |         #
 53 |         # for each (H, W) location i
 54 |         #   generate A anchor boxes centered on cell i
 55 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 56 |         # clip predicted boxes to image
 57 |         # remove predicted boxes with either height or width < threshold
 58 |         # sort all (proposal, score) pairs by score from highest to lowest
 59 |         # take top pre_nms_topN proposals before NMS
 60 |         # apply NMS with threshold 0.7 to remaining proposals
 61 |         # take after_nms_topN proposals after NMS
 62 |         # return the top proposals (-> RoIs top, scores top)
 63 | 
 64 | 
 65 |         # the first set of _num_anchors channels are bg probs
 66 |         # the second set are the fg probs
 67 |         scores = input[0][:, self._num_anchors:, :, :]
 68 |         bbox_deltas = input[1]
 69 |         im_info = input[2]
 70 |         cfg_key = input[3]
 71 | 
 72 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 73 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 74 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 75 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 76 | 
 77 |         batch_size = bbox_deltas.size(0)
 78 | 
 79 |         feat_height, feat_width = scores.size(2), scores.size(3)
 80 |         shift_x = np.arange(0, feat_width) * self._feat_stride
 81 |         shift_y = np.arange(0, feat_height) * self._feat_stride
 82 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 83 |         shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
 84 |                                   shift_x.ravel(), shift_y.ravel())).transpose())
 85 |         shifts = shifts.contiguous().type_as(scores).float()
 86 | 
 87 |         A = self._num_anchors
 88 |         K = shifts.size(0)
 89 | 
 90 |         self._anchors = self._anchors.type_as(scores)
 91 |         # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
 92 |         anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
 93 |         anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
 94 | 
 95 |         # Transpose and reshape predicted bbox transformations to get them
 96 |         # into the same order as the anchors:
 97 | 
 98 |         bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
 99 |         bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
100 | 
101 |         # Same story for the scores:
102 |         scores = scores.permute(0, 2, 3, 1).contiguous()
103 |         scores = scores.view(batch_size, -1)
104 | 
105 |         # Convert anchors into proposals via bbox transformations
106 |         proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
107 | 
108 |         # 2. clip predicted boxes to image
109 |         proposals = clip_boxes(proposals, im_info, batch_size)
110 |         # proposals = clip_boxes_batch(proposals, im_info, batch_size)
111 | 
112 |         # assign the score to 0 if it's non keep.
113 |         # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
114 | 
115 |         # trim keep index to make it euqal over batch
116 |         # keep_idx = torch.cat(tuple(keep_idx), 0)
117 | 
118 |         # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
119 |         # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
120 |         
121 |         # _, order = torch.sort(scores_keep, 1, True)
122 |         
123 |         scores_keep = scores
124 |         proposals_keep = proposals
125 |         _, order = torch.sort(scores_keep, 1, True)
126 | 
127 |         output = scores.new(batch_size, post_nms_topN, 5).zero_()
128 |         for i in range(batch_size):
129 |             # # 3. remove predicted boxes with either height or width < threshold
130 |             # # (NOTE: convert min_size to input image scale stored in im_info[2])
131 |             proposals_single = proposals_keep[i]
132 |             scores_single = scores_keep[i]
133 | 
134 |             # # 4. sort all (proposal, score) pairs by score from highest to lowest
135 |             # # 5. take top pre_nms_topN (e.g. 6000)
136 |             order_single = order[i]
137 | 
138 |             if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
139 |                 order_single = order_single[:pre_nms_topN]
140 | 
141 |             proposals_single = proposals_single[order_single, :]
142 |             scores_single = scores_single[order_single].view(-1,1)
143 | 
144 |             # 6. apply nms (e.g. threshold = 0.7)
145 |             # 7. take after_nms_topN (e.g. 300)
146 |             # 8. return the top proposals (-> RoIs top)
147 | 
148 |             keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
149 |             keep_idx_i = keep_idx_i.long().view(-1)
150 | 
151 |             if post_nms_topN > 0:
152 |                 keep_idx_i = keep_idx_i[:post_nms_topN]
153 |             proposals_single = proposals_single[keep_idx_i, :]
154 |             scores_single = scores_single[keep_idx_i, :]
155 | 
156 |             # padding 0 at the end.
157 |             num_proposal = proposals_single.size(0)
158 |             output[i,:,0] = i
159 |             output[i,:num_proposal,1:] = proposals_single
160 | 
161 |         return output
162 | 
163 |     def backward(self, top, propagate_down, bottom):
164 |         """This layer does not propagate gradients."""
165 |         pass
166 | 
167 |     def reshape(self, bottom, top):
168 |         """Reshaping happens during the call to forward."""
169 |         pass
170 | 
171 |     def _filter_boxes(self, boxes, min_size):
172 |         """Remove all boxes with any side smaller than min_size."""
173 |         ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
174 |         hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
175 |         keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
176 |         return keep
177 | 


--------------------------------------------------------------------------------
/lib/model/rpn/proposal_layer_region.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | # --------------------------------------------------------
  9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
 10 | # --------------------------------------------------------
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import numpy as np
 15 | import math
 16 | import yaml
 17 | from model.utils.config import cfg
 18 | from .generate_anchors import generate_anchors
 19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
 20 | from model.nms.nms_wrapper import nms
 21 | 
 22 | import pdb
 23 | 
 24 | DEBUG = False
 25 | 
 26 | class _ProposalLayer(nn.Module):
 27 |     """
 28 |     Outputs object detection proposals by applying estimated bounding-box
 29 |     transformations to a set of regular boxes (called "anchors").
 30 |     """
 31 | 
 32 |     def __init__(self, feat_stride, scales, ratios):
 33 |         super(_ProposalLayer, self).__init__()
 34 | 
 35 |         self._feat_stride = feat_stride
 36 |         self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 
 37 |             ratios=np.array(ratios))).float()
 38 |         self._num_anchors = self._anchors.size(0)
 39 | 
 40 |         # rois blob: holds R regions of interest, each is a 5-tuple
 41 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 42 |         # rectangle (x1, y1, x2, y2)
 43 |         # top[0].reshape(1, 5)
 44 |         #
 45 |         # # scores blob: holds scores for R regions of interest
 46 |         # if len(top) > 1:
 47 |         #     top[1].reshape(1, 1, 1, 1)
 48 | 
 49 |     def forward(self, input):
 50 | 
 51 |         # Algorithm:
 52 |         #
 53 |         # for each (H, W) location i
 54 |         #   generate A anchor boxes centered on cell i
 55 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 56 |         # clip predicted boxes to image
 57 |         # remove predicted boxes with either height or width < threshold
 58 |         # sort all (proposal, score) pairs by score from highest to lowest
 59 |         # take top pre_nms_topN proposals before NMS
 60 |         # apply NMS with threshold 0.7 to remaining proposals
 61 |         # take after_nms_topN proposals after NMS
 62 |         # return the top proposals (-> RoIs top, scores top)
 63 | 
 64 | 
 65 |         # the first set of _num_anchors channels are bg probs
 66 |         # the second set are the fg probs
 67 |         scores = input[0][:, self._num_anchors:, :, :]
 68 |         bbox_deltas = input[1]
 69 |         im_info = input[2]
 70 |         cfg_key = input[3]
 71 | 
 72 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 73 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 74 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 75 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 76 | 
 77 |         batch_size = bbox_deltas.size(0)
 78 | 
 79 |         feat_height, feat_width = scores.size(2), scores.size(3)
 80 |         shift_x = np.arange(0, feat_width) * self._feat_stride
 81 |         shift_y = np.arange(0, feat_height) * self._feat_stride
 82 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 83 |         shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
 84 |                                   shift_x.ravel(), shift_y.ravel())).transpose())
 85 |         shifts = shifts.contiguous().type_as(scores).float()
 86 | 
 87 |         A = self._num_anchors
 88 |         K = shifts.size(0)
 89 | 
 90 |         self._anchors = self._anchors.type_as(scores)
 91 |         # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
 92 |         anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
 93 |         anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
 94 | 
 95 |         # Transpose and reshape predicted bbox transformations to get them
 96 |         # into the same order as the anchors:
 97 | 
 98 |         bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
 99 |         bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
100 | 
101 |         # Same story for the scores:
102 |         scores = scores.permute(0, 2, 3, 1).contiguous()
103 |         scores = scores.view(batch_size, -1)
104 | 
105 |         # Convert anchors into proposals via bbox transformations
106 |         proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
107 | 
108 |         # 2. clip predicted boxes to image
109 |         proposals = clip_boxes(proposals, im_info, batch_size)
110 |         # proposals = clip_boxes_batch(proposals, im_info, batch_size)
111 | 
112 |         # assign the score to 0 if it's non keep.
113 |         # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
114 | 
115 |         # trim keep index to make it euqal over batch
116 |         # keep_idx = torch.cat(tuple(keep_idx), 0)
117 | 
118 |         # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
119 |         # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
120 |         
121 |         # _, order = torch.sort(scores_keep, 1, True)
122 |         
123 |         scores_keep = scores
124 |         proposals_keep = proposals
125 |         _, order = torch.sort(scores_keep, 1, True)
126 | 
127 |         output = scores.new(batch_size, post_nms_topN, 5).zero_()
128 |         output_cls_score = scores.new(batch_size, post_nms_topN, 2).zero_()
129 |         for i in range(batch_size):
130 |             # # 3. remove predicted boxes with either height or width < threshold
131 |             # # (NOTE: convert min_size to input image scale stored in im_info[2])
132 |             proposals_single = proposals_keep[i]
133 |             scores_single = scores_keep[i]
134 | 
135 |             # # 4. sort all (proposal, score) pairs by score from highest to lowest
136 |             # # 5. take top pre_nms_topN (e.g. 6000)
137 |             order_single = order[i]
138 | 
139 |             if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
140 |                 order_single = order_single[:pre_nms_topN]
141 | 
142 |             proposals_single = proposals_single[order_single, :]
143 |             scores_single = scores_single[order_single].view(-1,1)
144 | 
145 |             # 6. apply nms (e.g. threshold = 0.7)
146 |             # 7. take after_nms_topN (e.g. 300)
147 |             # 8. return the top proposals (-> RoIs top)
148 | 
149 |             keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
150 |             keep_idx_i = keep_idx_i.long().view(-1)
151 | 
152 |             if post_nms_topN > 0:
153 |                 keep_idx_i = keep_idx_i[:post_nms_topN]
154 |             proposals_single = proposals_single[keep_idx_i, :]
155 |             scores_single = scores_single[keep_idx_i, :]
156 | 
157 |             # padding 0 at the end.
158 |             num_proposal = proposals_single.size(0)
159 |             output[i,:,0] = i
160 |             output[i,:num_proposal,1:] = proposals_single
161 |             output_cls_score[i,:,0] = i
162 |             output_cls_score[i,:num_proposal,1] = scores_single
163 | 
164 |         return output, output_cls_score
165 | 
166 |     def backward(self, top, propagate_down, bottom):
167 |         """This layer does not propagate gradients."""
168 |         pass
169 | 
170 |     def reshape(self, bottom, top):
171 |         """Reshaping happens during the call to forward."""
172 |         pass
173 | 
174 |     def _filter_boxes(self, boxes, min_size):
175 |         """Remove all boxes with any side smaller than min_size."""
176 |         ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
177 |         hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
178 |         keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
179 |         return keep
180 | 


--------------------------------------------------------------------------------
/lib/model/rpn/rpn_region.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | 
  7 | from model.utils.config import cfg
  8 | from .proposal_layer_region import _ProposalLayer
  9 | from .anchor_target_layer import _AnchorTargetLayer
 10 | from model.utils.net_utils import _smooth_l1_loss
 11 | 
 12 | import numpy as np
 13 | import math
 14 | import pdb
 15 | import time
 16 | 
 17 | class _RPN(nn.Module):
 18 |     """ region proposal network """
 19 |     def __init__(self, din):
 20 |         super(_RPN, self).__init__()
 21 |         
 22 |         self.din = din  # get depth of input feature map, e.g., 512
 23 |         self.anchor_scales = cfg.ANCHOR_SCALES
 24 |         self.anchor_ratios = cfg.ANCHOR_RATIOS
 25 |         self.feat_stride = cfg.FEAT_STRIDE[0]
 26 | 
 27 |         # define the convrelu layers processing input feature map
 28 |         self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 29 | 
 30 |         # define bg/fg classifcation score layer
 31 |         self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
 32 |         self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
 33 | 
 34 |         # define anchor box offset prediction layer
 35 |         self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
 36 |         self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
 37 | 
 38 |         # define proposal layer
 39 |         self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 40 | 
 41 |         # define anchor target layer
 42 |         self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 43 | 
 44 |         self.rpn_loss_cls = 0
 45 |         self.rpn_loss_box = 0
 46 | 
 47 |     @staticmethod
 48 |     def reshape(x, d):
 49 |         input_shape = x.size()
 50 |         x = x.view(
 51 |             input_shape[0],
 52 |             int(d),
 53 |             int(float(input_shape[1] * input_shape[2]) / float(d)),
 54 |             input_shape[3]
 55 |         )
 56 |         return x
 57 | 
 58 |     def forward(self, base_feat, im_info, gt_boxes, num_boxes):
 59 | 
 60 |         batch_size = base_feat.size(0)
 61 | 
 62 |         # return feature map after convrelu layer
 63 |         rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
 64 |         # get rpn classification score
 65 |         rpn_cls_score = self.RPN_cls_score(rpn_conv1)
 66 | 
 67 |         rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 68 |         rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape,dim=1)
 69 |         rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
 70 | 
 71 |         # get rpn offsets to the anchor boxes
 72 |         rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
 73 | 
 74 |         # proposal layer
 75 |         cfg_key = 'TRAIN' if self.training else 'TEST'
 76 | 
 77 |         rois, output_cls_score= self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
 78 |                                  im_info, cfg_key))
 79 | 
 80 |         self.rpn_loss_cls = 0
 81 |         self.rpn_loss_box = 0
 82 | 
 83 |         # generating training labels and build the rpn loss
 84 |         if self.training:
 85 |             assert gt_boxes is not None
 86 | 
 87 |             rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
 88 | 
 89 |             # compute classification loss
 90 |             rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
 91 |             rpn_label = rpn_data[0].view(batch_size, -1)
 92 | 
 93 |             rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
 94 |             rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
 95 |             rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
 96 |             rpn_label = Variable(rpn_label.long())
 97 |             self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
 98 |             fg_cnt = torch.sum(rpn_label.data.ne(0))
 99 | 
100 |             rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
101 | 
102 |             # compute bbox regression loss
103 |             rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
104 |             rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
105 |             rpn_bbox_targets = Variable(rpn_bbox_targets)
106 | 
107 |             self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
108 |                                                             rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])
109 | 
110 |         return rois, output_cls_score, self.rpn_loss_cls, self.rpn_loss_box
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YoungXIAO13/FewShotDetection/b703ae7e341e1790923c6a910a6572fe2510b1f2/lib/model/utils/__init__.py


--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
 16 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 17 |     return bbox_overlaps_c(boxes, query_boxes)
 18 | 
 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 20 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 21 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 22 |     """
 23 |     Parameters
 24 |     ----------
 25 |     boxes: (N, 4) ndarray of float
 26 |     query_boxes: (K, 4) ndarray of float
 27 |     Returns
 28 |     -------
 29 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 30 |     """
 31 |     cdef unsigned int N = boxes.shape[0]
 32 |     cdef unsigned int K = query_boxes.shape[0]
 33 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 34 |     cdef DTYPE_t iw, ih, box_area
 35 |     cdef DTYPE_t ua
 36 |     cdef unsigned int k, n
 37 |     for k in range(K):
 38 |         box_area = (
 39 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 40 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 41 |         )
 42 |         for n in range(N):
 43 |             iw = (
 44 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 45 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 46 |             )
 47 |             if iw > 0:
 48 |                 ih = (
 49 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 50 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 51 |                 )
 52 |                 if ih > 0:
 53 |                     ua = float(
 54 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 55 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 56 |                         box_area - iw * ih
 57 |                     )
 58 |                     overlaps[n, k] = iw * ih / ua
 59 |     return overlaps
 60 | 
 61 | 
 62 | def bbox_intersections(
 63 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 64 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 65 |     return bbox_intersections_c(boxes, query_boxes)
 66 | 
 67 | 
 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 69 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 70 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 71 |     """
 72 |     For each query box compute the intersection ratio covered by boxes
 73 |     ----------
 74 |     Parameters
 75 |     ----------
 76 |     boxes: (N, 4) ndarray of float
 77 |     query_boxes: (K, 4) ndarray of float
 78 |     Returns
 79 |     -------
 80 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 81 |     """
 82 |     cdef unsigned int N = boxes.shape[0]
 83 |     cdef unsigned int K = query_boxes.shape[0]
 84 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 85 |     cdef DTYPE_t iw, ih, box_area
 86 |     cdef DTYPE_t ua
 87 |     cdef unsigned int k, n
 88 |     for k in range(K):
 89 |         box_area = (
 90 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 91 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 92 |         )
 93 |         for n in range(N):
 94 |             iw = (
 95 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 96 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 97 |             )
 98 |             if iw > 0:
 99 |                 ih = (
100 |                     min(boxes[n, 3], query_boxes[k, 3]) -
101 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
102 |                 )
103 |                 if ih > 0:
104 |                     intersec[n, k] = iw * ih / box_area
105 |     return intersec


--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | # from scipy.misc import imread, imresize
12 | import cv2
13 | 
14 | try:
15 |     xrange          # Python 2
16 | except NameError:
17 |     xrange = range  # Python 3
18 | 
19 | 
20 | def im_list_to_blob(ims):
21 |     """Convert a list of images into a network input.
22 | 
23 |     Assumes images are already prepared (means subtracted, BGR order, ...).
24 |     """
25 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
26 |     num_images = len(ims)
27 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
28 |                     dtype=np.float32)
29 |     for i in xrange(num_images):
30 |         im = ims[i]
31 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
32 | 
33 |     return blob
34 | 
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 |     """Mean subtract and scale an image for use in a blob."""
37 | 
38 |     im = im.astype(np.float32, copy=False)
39 |     im -= pixel_means
40 |     # im = im[:, :, ::-1]
41 |     im_shape = im.shape
42 |     im_size_min = np.min(im_shape[0:2])
43 |     im_size_max = np.max(im_shape[0:2])
44 |     im_scale = float(target_size) / float(im_size_min)
45 |     # Prevent the biggest axis from being more than MAX_SIZE
46 |     # if np.round(im_scale * im_size_max) > max_size:
47 |     #     im_scale = float(max_size) / float(im_size_max)
48 |     # im = imresize(im, im_scale)
49 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
50 |                     interpolation=cv2.INTER_LINEAR)
51 | 
52 |     return im, im_scale
53 | 


--------------------------------------------------------------------------------
/lib/pycocotools/UPSTREAM_REV:
--------------------------------------------------------------------------------
1 | https://github.com/pdollar/coco/commit/3ac47c77ebd5a1ed4254a98b7fbf2ef4765a3574
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/lib/pycocotools/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014, Piotr Dollar and Tsung-Yi Lin
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met: 
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer. 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution. 
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies, 
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'tsungyi'
 2 | 
 3 | from . import _mask
 4 | 
 5 | # Interface for manipulating masks stored in RLE format.
 6 | #
 7 | # RLE is a simple yet efficient format for storing binary masks. RLE
 8 | # first divides a vector (or vectorized image) into a series of piecewise
 9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | #  encode         - Encode binary masks using RLE.
32 | #  decode         - Decode binary masks encoded via RLE.
33 | #  merge          - Compute union or intersection of encoded masks.
34 | #  iou            - Compute intersection over union between masks.
35 | #  area           - Compute area of encoded masks.
36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | #  Rs     = encode( masks )
41 | #  masks  = decode( Rs )
42 | #  R      = merge( Rs, intersect=false )
43 | #  o      = iou( dt, gt, iscrowd )
44 | #  a      = area( Rs )
45 | #  bbs    = toBbox( Rs )
46 | #  Rs     = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | #  Rs      - [dict] Run-length encoding of binary masks
50 | #  R       - dict Run-length encoding of binary mask
51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | #  dt,gt   - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox.      version 2.0
72 | # Data, paper, and tutorials available at:  http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 | 
76 | encode      = _mask.encode
77 | decode      = _mask.decode
78 | iou         = _mask.iou
79 | merge       = _mask.merge
80 | area        = _mask.area
81 | toBbox      = _mask.toBbox
82 | frPyObjects = _mask.frPyObjects


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | #include <stdbool.h>
 9 | 
10 | typedef unsigned int uint;
11 | typedef unsigned long siz;
12 | typedef unsigned char byte;
13 | typedef double* BB;
14 | typedef struct { siz h, w, m; uint *cnts; } RLE;
15 | 
16 | // Initialize/destroy RLE.
17 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
18 | void rleFree( RLE *R );
19 | 
20 | // Initialize/destroy RLE array.
21 | void rlesInit( RLE **R, siz n );
22 | void rlesFree( RLE **R, siz n );
23 | 
24 | // Encode binary masks using RLE.
25 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
26 | 
27 | // Decode binary masks encoded via RLE.
28 | void rleDecode( const RLE *R, byte *mask, siz n );
29 | 
30 | // Compute union or intersection of encoded masks.
31 | void rleMerge( const RLE *R, RLE *M, siz n, bool intersect );
32 | 
33 | // Compute area of encoded masks.
34 | void rleArea( const RLE *R, siz n, uint *a );
35 | 
36 | // Compute intersection over union between masks.
37 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
38 | 
39 | // Compute intersection over union between bounding boxes.
40 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
41 | 
42 | // Get bounding boxes surrounding encoded masks.
43 | void rleToBbox( const RLE *R, BB bb, siz n );
44 | 
45 | // Convert bounding boxes to encoded masks.
46 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
47 | 
48 | // Convert polygon to encoded mask.
49 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
50 | 
51 | // Get compressed string representation of encoded mask.
52 | char* rleToString( const RLE *R );
53 | 
54 | // Convert from compressed string representation of encoded mask.
55 | void rleFrString( RLE *R, char *s, siz h, siz w );
56 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | # import numpy.random as npr
15 | from scipy.misc import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | 
20 | def get_minibatch(roidb, num_classes, random_scale_inds):
21 |   """Given a roidb, construct a minibatch sampled from it."""
22 |   num_images = len(roidb)
23 |   # Sample random scales to use for each image in this batch
24 |   # random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
25 |   #                 size=num_images)
26 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
27 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
28 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
29 | 
30 |   # Get the input image blob, formatted for caffe
31 |   im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
32 | 
33 |   blobs = {'data': im_blob}
34 | 
35 |   assert len(im_scales) == 1, "Single batch only"
36 |   assert len(roidb) == 1, "Single batch only"
37 |   
38 |   # gt boxes: (x1, y1, x2, y2, cls)
39 |   if cfg.TRAIN.USE_ALL_GT:
40 |     # Include all ground truth boxes
41 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
42 |   else:
43 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
44 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
45 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
46 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
47 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
48 |   blobs['gt_boxes'] = gt_boxes
49 |   blobs['im_info'] = np.array(
50 |     [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
51 |     dtype=np.float32)
52 | 
53 |   blobs['img_id'] = roidb[0]['img_id']
54 | 
55 |   return blobs
56 | 
57 | def _get_image_blob(roidb, scale_inds):
58 |   """Builds an input blob from the images in the roidb at the specified
59 |   scales.
60 |   """
61 |   num_images = len(roidb)
62 | 
63 |   processed_ims = []
64 |   im_scales = []
65 |   for i in range(num_images):
66 |     #im = cv2.imread(roidb[i]['image'])  # BGR
67 |     im = imread(roidb[i]['image'])  # RGB
68 | 
69 |     # tile channels for 1-channel images
70 |     if len(im.shape) == 2:
71 |       im = im[:, :, np.newaxis]
72 |       im = np.concatenate((im, im, im), axis=2)
73 | 
74 |     # drop the last channel for 4-channel images
75 |     if im.shape[-1] == 4:
76 |       im = im[:, :, :-1]
77 | 
78 |     # rgb -> bgr
79 |     im = im[:, :, ::-1]
80 | 
81 |     if roidb[i]['flipped']:
82 |       im = im[:, ::-1, :]
83 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
84 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
85 |     im_scales.append(im_scale)
86 |     processed_ims.append(im)
87 | 
88 |   # Create a blob to hold the input images
89 |   blob = im_list_to_blob(processed_ims)
90 | 
91 |   return blob, im_scales
92 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import os
 10 | from os.path import join as pjoin
 11 | import numpy as np
 12 | from distutils.core import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | 
 16 | 
 17 | def find_in_path(name, path):
 18 |     "Find a file in a search path"
 19 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | # def locate_cuda():
 28 | #     """Locate the CUDA environment on the system
 29 | #
 30 | #     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 | #     and values giving the absolute path to each directory.
 32 | #
 33 | #     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 | #     is based on finding 'nvcc' in the PATH.
 35 | #     """
 36 | # 
 37 | #     # first check if the CUDAHOME env variable is in use
 38 | #     if 'CUDAHOME' in os.environ:
 39 | #         home = os.environ['CUDAHOME']
 40 | #         nvcc = pjoin(home, 'bin', 'nvcc')
 41 | #     else:
 42 | #         # otherwise, search the PATH for NVCC
 43 | #         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 | #         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 | #         if nvcc is None:
 46 | #             raise EnvironmentError('The nvcc binary could not be '
 47 | #                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 | #         home = os.path.dirname(os.path.dirname(nvcc))
 49 | #
 50 | #     cudaconfig = {'home': home, 'nvcc': nvcc,
 51 | #                   'include': pjoin(home, 'include'),
 52 | #                   'lib64': pjoin(home, 'lib64')}
 53 | #     for k, v in cudaconfig.iteritems():
 54 | #         if not os.path.exists(v):
 55 | #             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | #
 57 | #     return cudaconfig
 58 | 
 59 | 
 60 | # CUDA = locate_cuda()
 61 | 
 62 | # Obtain the numpy include directory.  This logic works across numpy versions.
 63 | try:
 64 |     numpy_include = np.get_include()
 65 | except AttributeError:
 66 |     numpy_include = np.get_numpy_include()
 67 | 
 68 | 
 69 | def customize_compiler_for_nvcc(self):
 70 |     """inject deep into distutils to customize how the dispatch
 71 |     to gcc/nvcc works.
 72 | 
 73 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 74 |     injected in, and still have the right customizations (i.e.
 75 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 76 |     the OO route, I have this. Note, it's kindof like a wierd functional
 77 |     subclassing going on."""
 78 | 
 79 |     # tell the compiler it can processes .cu
 80 |     self.src_extensions.append('.cu')
 81 | 
 82 |     # save references to the default compiler_so and _comple methods
 83 |     default_compiler_so = self.compiler_so
 84 |     super = self._compile
 85 | 
 86 |     # now redefine the _compile method. This gets executed for each
 87 |     # object but distutils doesn't have the ability to change compilers
 88 |     # based on source extension: we add it.
 89 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 90 |         print(extra_postargs)
 91 |         if os.path.splitext(src)[1] == '.cu':
 92 |             # use the cuda for .cu files
 93 |             self.set_executable('compiler_so', CUDA['nvcc'])
 94 |             # use only a subset of the extra_postargs, which are 1-1 translated
 95 |             # from the extra_compile_args in the Extension class
 96 |             postargs = extra_postargs['nvcc']
 97 |         else:
 98 |             postargs = extra_postargs['gcc']
 99 | 
100 |         super(obj, src, ext, cc_args, postargs, pp_opts)
101 |         # reset the default compiler_so, which we might have changed for cuda
102 |         self.compiler_so = default_compiler_so
103 | 
104 |     # inject our redefined _compile method into the class
105 |     self._compile = _compile
106 | 
107 | 
108 | # run the customize_compiler
109 | class custom_build_ext(build_ext):
110 |     def build_extensions(self):
111 |         customize_compiler_for_nvcc(self.compiler)
112 |         build_ext.build_extensions(self)
113 | 
114 | 
115 | ext_modules = [
116 |     Extension(
117 |         "model.utils.cython_bbox",
118 |         ["model/utils/bbox.pyx"],
119 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
120 |         include_dirs=[numpy_include]
121 |     ),
122 |     Extension(
123 |         'pycocotools._mask',
124 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
125 |         include_dirs=[numpy_include, 'pycocotools'],
126 |         extra_compile_args={
127 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
128 |     ),
129 | ]
130 | 
131 | setup(
132 |     name='faster_rcnn',
133 |     ext_modules=ext_modules,
134 |     # inject our custom trigger
135 |     cmdclass={'build_ext': custom_build_ext},
136 | )
137 | 


--------------------------------------------------------------------------------
/run/finetune_coco.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/COCO"
 4 | 
 5 | # number of shots
 6 | for j in 10 30
 7 | do
 8 | # few-shot fine-tuning
 9 | python train.py --dataset coco \
10 | --epochs 30 --bs 4 --nw 8 \
11 | --log_dir checkpoint --save_dir $base_dir \
12 | --r True --checksession 200 --checkepoch 20 \
13 | --meta_type 0 --shots $j --phase 2 --meta_train True --meta_loss True
14 | done
15 | 


--------------------------------------------------------------------------------
/run/finetune_coco_TFA.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/COCO"
 4 | 
 5 | # number of shots
 6 | for j in 1 2 3 5 10 30
 7 | do
 8 | # few-shot fine-tuning
 9 | CUDA_VISIBLE_DEVICES=3 python train.py --dataset coco \
10 | --epochs 30 --bs 4 --nw 8 --log_dir checkpoint --save_dir $base_dir \
11 | --r True --checksession 200 --checkepoch 20 --meta_type 0 --shots $j --phase 2 \
12 | --meta_train True --meta_loss True --TFA True
13 | 
14 | CUDA_VISIBLE_DEVICES=3 python test.py --dataset coco \
15 | --load_dir $base_dir  --meta_type 0 \
16 | --checksession $j --checkepoch 29 --shots $j \
17 | --phase 2 --meta_test True --meta_loss True
18 | done


--------------------------------------------------------------------------------
/run/finetune_voc_first.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/VOC_first"
 4 | 
 5 | # number of shots
 6 | for j in 1 2 3 5 10
 7 | do
 8 | # few-shot fine-tuning
 9 | python train.py --dataset pascal_voc_0712 \
10 | --epochs 30 --bs 4 --nw 8 \
11 | --log_dir checkpoint --save_dir $base_dir \
12 | --r True --checksession 200 --checkepoch 20 \
13 | --meta_type 1 --shots $j --phase 2 --meta_train True --meta_loss True
14 | done
15 | 


--------------------------------------------------------------------------------
/run/finetune_voc_second.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/VOC_second"
 4 | 
 5 | # number of shots
 6 | for j in 1 2 3 5 10
 7 | do
 8 | # few-shot fine-tuning
 9 | python train.py --dataset pascal_voc_0712 \
10 | --epochs 30 --bs 4 --nw 8 \
11 | --log_dir checkpoint --save_dir $base_dir \
12 | --r True --checksession 200 --checkepoch 20 \
13 | --meta_type 2 --shots $j --phase 2 --meta_train True --meta_loss True
14 | done
15 | 


--------------------------------------------------------------------------------
/run/finetune_voc_third.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/VOC_third"
 4 | 
 5 | # number of shots
 6 | for j in 1 2 3 5 10
 7 | do
 8 | # few-shot fine-tuning
 9 | python train.py --dataset pascal_voc_0712 \
10 | --epochs 30 --bs 4 --nw 8 \
11 | --log_dir checkpoint --save_dir $base_dir \
12 | --r True --checksession 200 --checkepoch 20 \
13 | --meta_type 3 --shots $j --phase 2 --meta_train True --meta_loss True
14 | done
15 | 


--------------------------------------------------------------------------------
/run/test_coco.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/COCO"
 4 | 
 5 | # number of shots
 6 | for j in 10 30
 7 | do
 8 | # testing on base and novel class
 9 | python test.py --dataset coco \
10 | --load_dir $base_dir  --meta_type 0 \
11 | --checksession $j --checkepoch 29 --shots $j \
12 | --phase 2 --meta_test True --meta_loss True
13 | done


--------------------------------------------------------------------------------
/run/test_voc_first.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/VOC_first"
 4 | 
 5 | # number of shots
 6 | for j in 1 2 3 5 10
 7 | do
 8 | # testing on base and novel class
 9 | python test.py --dataset pascal_voc_0712 \
10 | --load_dir $base_dir  --meta_type 1 \
11 | --checksession $j --checkepoch 29 --shots $j \
12 | --phase 2 --meta_test True --meta_loss True
13 | done
14 | 


--------------------------------------------------------------------------------
/run/test_voc_second.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/VOC_second"
 4 | 
 5 | # number of shots
 6 | for j in 1 2 3 5 10
 7 | do
 8 | # testing on base and novel class
 9 | python test.py --dataset pascal_voc_0712 \
10 | --load_dir $base_dir  --meta_type 2 \
11 | --checksession $j --checkepoch 29 --shots $j \
12 | --phase 2 --meta_test True --meta_loss True
13 | done
14 | 


--------------------------------------------------------------------------------
/run/test_voc_third.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/VOC_third"
 4 | 
 5 | # number of shots
 6 | for j in 1 2 3 5 10
 7 | do
 8 | # testing on base and novel class
 9 | python test.py --dataset pascal_voc_0712 \
10 | --load_dir $base_dir  --meta_type 3 \
11 | --checksession $j --checkepoch 29 --shots $j \
12 | --phase 2 --meta_test True --meta_loss True
13 | done
14 | 


--------------------------------------------------------------------------------
/run/train_coco.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/COCO"
 4 | 
 5 | # base class training
 6 | python train.py --dataset coco \
 7 | --epochs 21 --bs 4 --nw 8 \
 8 | --log_dir checkpoint --save_dir $base_dir \
 9 | --meta_type 0 --meta_train True --meta_loss True
10 | 


--------------------------------------------------------------------------------
/run/train_voc_first.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | base_dir="save_models/VOC_first"
4 | 
5 | # base class training
6 | python train.py --dataset pascal_voc_0712 \
7 | --epochs 21 --bs 4 --nw 8 \
8 | --log_dir checkpoint --save_dir $base_dir \
9 | --meta_type 1 --meta_train True --meta_loss True


--------------------------------------------------------------------------------
/run/train_voc_second.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | base_dir="save_models/VOC_second"
4 | 
5 | # base class training
6 | python train.py --dataset pascal_voc_0712 \
7 | --epochs 21 --bs 4 --nw 8 \
8 | --log_dir checkpoint --save_dir $base_dir \
9 | --meta_type 2 --meta_train True --meta_loss True


--------------------------------------------------------------------------------
/run/train_voc_third.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | base_dir="save_models/VOC_third"
 4 | 
 5 | # base class training
 6 | python train.py --dataset pascal_voc_0712 \
 7 | --epochs 21 --bs 4 --nw 8 \
 8 | --log_dir checkpoint --save_dir $base_dir \
 9 | --meta_type 3 --meta_train True --meta_loss True
10 | 


--------------------------------------------------------------------------------