├── lib
    ├── model
    │   ├── __init__.py
    │   ├── fpn
    │   │   └── __init__.py
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── nms
    │   │   │   │   └── __init__.py
    │   │   ├── .gitignore
    │   │   ├── make.sh
    │   │   ├── src
    │   │   │   ├── nms_cuda_kernel.h
    │   │   │   ├── nms_cuda.h
    │   │   │   ├── nms_cuda.c
    │   │   │   └── nms_cuda_kernel.cu
    │   │   ├── nms_gpu.py
    │   │   ├── build.py
    │   │   ├── nms_wrapper.py
    │   │   ├── nms_kernel.cu
    │   │   └── cpu_nms.pyx
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── rpn_fpn.py
    │   │   ├── proposal_layer_fpn.py
    │   │   ├── generate_anchors.py
    │   │   ├── anchor_target_layer_fpn.py
    │   │   └── bbox_transform.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── .gitignore
    │   │   ├── summary.py
    │   │   ├── blob.py
    │   │   ├── logger.py
    │   │   └── bbox.pyx
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align
    │   │   │   │   └── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── make.sh
    │   │   ├── src
    │   │   │   ├── roi_align_cuda.h
    │   │   │   ├── roi_align_kernel.h
    │   │   │   ├── roi_align_cuda.c
    │   │   │   └── roi_align_kernel.cu
    │   │   └── build.py
    │   ├── roi_crop
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_crop
    │   │   │   │   └── __init__.py
    │   │   │   └── crop_resize
    │   │   │   │   └── __init__.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_crop.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_crop.py
    │   │   │   ├── crop_resize.py
    │   │   │   └── gridgen.py
    │   │   ├── make.sh
    │   │   ├── src
    │   │   │   ├── roi_crop_cuda.h
    │   │   │   ├── roi_crop.h
    │   │   │   ├── roi_crop_cuda_kernel.h
    │   │   │   └── roi_crop_cuda.c
    │   │   └── build.py
    │   └── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │       ├── __init__.py
    │   │       └── roi_pooling
    │   │       │   └── __init__.py
    │   │   ├── functions
    │   │       ├── __init__.py
    │   │       └── roi_pool.py
    │   │   ├── modules
    │   │       ├── __init__.py
    │   │       ├── roi_pool.py
    │   │       └── roi_pool_py.py
    │   │   ├── src
    │   │       ├── roi_pooling.h
    │   │       ├── roi_pooling_cuda.h
    │   │       ├── roi_pooling_kernel.h
    │   │       ├── roi_pooling_cuda.c
    │   │       └── roi_pooling.c
    │   │   └── build.py
    ├── pycocotools
    │   ├── __init__.py
    │   ├── maskApi.h
    │   ├── mask.py
    │   └── maskApi.c
    ├── datasets
    │   ├── __init__.py
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── xVOCap.m
    │   │   ├── get_voc_opts.m
    │   │   └── voc_eval.m
    │   ├── ds_utils.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   ├── factory.py
    │   ├── vg_eval.py
    │   ├── voc_eval.py
    │   ├── imagenet.py
    │   └── imdb.py
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── minibatch.py
    │   ├── roidb.py
    │   └── roibatchLoader.py
    ├── make.sh
    └── setup.py
├── .gitignore
├── demo_images
    ├── 000005.jpg
    ├── 000006.jpg
    └── 000009.jpg
├── requirements.txt
├── cfgs
    ├── vgg16.yml
    ├── res50.yml
    ├── res101.yml
    ├── detnet59.yml
    ├── res101_ls.yml
    └── detnet59_ls.yml
├── _init_paths.py
├── LICENSE
└── README.md


/lib/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/fpn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | *.pyc
3 | *.so
4 | *.a
5 | .idea/
6 | 


--------------------------------------------------------------------------------
/demo_images/000005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruoqianguo/DetNet_pytorch/HEAD/demo_images/000005.jpg


--------------------------------------------------------------------------------
/demo_images/000006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruoqianguo/DetNet_pytorch/HEAD/demo_images/000006.jpg


--------------------------------------------------------------------------------
/demo_images/000009.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ruoqianguo/DetNet_pytorch/HEAD/demo_images/000009.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cython
2 | cffi
3 | opencv-python
4 | scipy
5 | easydict
6 | matplotlib
7 | pyyaml
8 | tensorboardX


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from ._ext import nms
 4 | import pdb
 5 | 
 6 | def nms_gpu(dets, thresh):
 7 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 8 | 	num_out = dets.new(1).zero_().int()
 9 | 	nms.nms_cuda(keep, dets, num_out, thresh)
10 | 	keep = keep[:num_out[0]]
11 | 	return keep
12 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   BATCH_SIZE: 256
10 |   LEARNING_RATE: 0.01
11 | TEST:
12 |   HAS_RPN: True
13 | POOLING_MODE: align
14 | CROP_RESIZE_WITH_MAX_POOL: False
15 | 


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 
14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res50
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   # IMS_PER_BATCH: 1
 5 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 |   RPN_POSITIVE_OVERLAP: 0.7
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   DISPLAY: 20
11 |   BATCH_SIZE: 256
12 |   WEIGHT_DECAY: 0.0001
13 |   DOUBLE_BIAS: False
14 |   SNAPSHOT_PREFIX: res50_faster_rcnn
15 | TEST:
16 |   HAS_RPN: True
17 | POOLING_MODE: crop
18 | 


--------------------------------------------------------------------------------
/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   DOUBLE_BIAS: False
13 |   LEARNING_RATE: 0.001
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_SIZE: 7
17 | POOLING_MODE: align
18 | CROP_RESIZE_WITH_MAX_POOL: False
19 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/cfgs/detnet59.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   DOUBLE_BIAS: False
13 |   LEARNING_RATE: 0.001
14 |   ASPECT_CROPPING: True
15 | TEST:
16 |   HAS_RPN: True
17 | POOLING_SIZE: 14
18 | POOLING_MODE: align
19 | CROP_RESIZE_WITH_MAX_POOL: False
20 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/lib/model/utils/summary.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def write_scalars(writer, scalars, names, n_iter, tag=None):
 4 |     for i, scalar, in enumerate(scalars):
 5 |         if tag is not None:
 6 |             name = os.path.join(tag, names[i])
 7 |         else:
 8 |             name = names[i]
 9 |         writer.add_scalar(name, scalar, n_iter)
10 | 
11 | def write_hist_parameters(writer, net, n_iter):
12 |     for name, param in net.named_parameters():
13 |         writer.add_histogram(name, param.clone().cpu().data.numpy(), n_iter)
14 | 
15 | 


--------------------------------------------------------------------------------
/cfgs/res101_ls.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   SCALES: [800]
13 |   DOUBLE_BIAS: False
14 |   LEARNING_RATE: 0.001
15 | TEST:
16 |   HAS_RPN: True
17 |   SCALES: [800]
18 |   MAX_SIZE: 1200
19 |   RPN_POST_NMS_TOP_N: 1000
20 | POOLING_SIZE: 7
21 | POOLING_MODE: align
22 | CROP_RESIZE_WITH_MAX_POOL: False
23 | 


--------------------------------------------------------------------------------
/cfgs/detnet59_ls.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   SCALES: [800]
13 |   DOUBLE_BIAS: False
14 |   LEARNING_RATE: 0.001
15 |   ASPECT_CROPPING: True
16 | TEST:
17 |   HAS_RPN: True
18 |   SCALES: [800]
19 |   MAX_SIZE: 1200
20 |   RPN_POST_NMS_TOP_N: 1000
21 | POOLING_SIZE: 14
22 | POOLING_MODE: align
23 | CROP_RESIZE_WITH_MAX_POOL: False
24 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois, scale):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <stdio.h>
 3 | #include "nms_cuda_kernel.h"
 4 | 
 5 | // this symbol will be resolved automatically from PyTorch libs
 6 | extern THCState *state;
 7 | 
 8 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
 9 | 		     THCudaIntTensor *num_out, float nms_overlap_thresh) {
10 | 
11 | 	nms_cuda_compute(THCudaIntTensor_data(state, keep_out), 
12 | 		         THCudaIntTensor_data(state, num_out), 
13 |       	                 THCudaTensor_data(state, boxes_host), 
14 | 		         boxes_host->size[0], 
15 | 		         boxes_host->size[1],
16 | 		         nms_overlap_thresh);
17 | 
18 | 	return 1;
19 | }
20 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/roi_pooling.c']
 7 | headers = ['src/roi_pooling.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/roi_pooling_cuda.c']
14 |     headers += ['src/roi_pooling_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/roi_pooling.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.roi_pooling',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | #this_file = os.path.dirname(__file__)
 6 | 
 7 | sources = []
 8 | headers = []
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/nms_cuda.c']
15 |     headers += ['src/nms_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/nms_cuda_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | print(extra_objects)
24 | 
25 | ffi = create_extension(
26 |     '_ext.nms',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | # sources = ['src/roi_align.c']
 6 | # headers = ['src/roi_align.h']
 7 | sources = []
 8 | headers = []
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_align_cuda.c']
15 |     headers += ['src/roi_align_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_align_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_align',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | #this_file = os.path.dirname(__file__)
 6 | 
 7 | sources = ['src/roi_crop.c']
 8 | headers = ['src/roi_crop.h']
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_crop_cuda.c']
15 |     headers += ['src/roi_crop_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_crop',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jianwei Yang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from model.utils.config import cfg
 9 | from model.nms.nms_gpu import nms_gpu
10 | from model.nms.cpu_nms import cpu_soft_nms
11 | import numpy as np
12 | 
13 | 
14 | def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.001, method=1):
15 |     keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32),
16 |                         np.float32(sigma), np.float32(Nt),
17 |                         np.float32(threshold),
18 |                         np.uint8(method))
19 |     keep = np.array(keep)
20 |     return keep
21 | 
22 | 
23 | def nms(dets, thresh, force_cpu=False):
24 |     """Dispatch to either CPU or GPU NMS implementations."""
25 |     if dets.shape[0] == 0:
26 |         return []
27 |     # ---numpy version---
28 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
29 |     # ---pytorch version---
30 |     return nms_gpu(dets, thresh)
31 | 


--------------------------------------------------------------------------------
/lib/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | python setup.py build_ext --inplace
 6 | rm -rf build
 7 | 
 8 | # compile NMS
 9 | cd model/nms/src
10 | echo "Compiling nms kernels by nvcc..."
11 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu \
12 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
13 | cd ../
14 | python build.py
15 | 
16 | # compile roi_pooling
17 | cd ../../
18 | cd model/roi_pooling/src
19 | echo "Compiling roi pooling kernels by nvcc..."
20 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu \
21 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
22 | cd ../
23 | python build.py
24 | 
25 | # compile roi_align
26 | cd ../../
27 | cd model/roi_align/src
28 | echo "Compiling roi align kernels by nvcc..."
29 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu \
30 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
31 | cd ../
32 | python build.py
33 | 
34 | # compile roi_crop
35 | cd ../../
36 | cd model/roi_crop/src
37 | echo "Compiling roi crop kernels by nvcc..."
38 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu \
39 | 	 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
40 | cd ../
41 | python build.py
42 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 5 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 6 | so that it's consistent with those computed by Jan Hosang (see:
 7 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 8 |   computing/research/object-recognition-and-scene-understanding/how-
 9 |   good-are-detection-proposals-really/)
10 | 
11 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
12 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
13 | """
14 | 
15 | def munge(src_dir):
16 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
17 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
18 | 
19 |     files = os.listdir(src_dir)
20 |     for fn in files:
21 |         base, ext = os.path.splitext(fn)
22 |         # first 14 chars / first 22 chars / all chars + .mat
23 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
24 |         first = base[:14]
25 |         second = base[:22]
26 |         dst_dir = os.path.join('MCG', 'mat', first, second)
27 |         if not os.path.exists(dst_dir):
28 |             os.makedirs(dst_dir)
29 |         src = os.path.join(src_dir, fn)
30 |         dst = os.path.join(dst_dir, fn)
31 |         print 'MV: {} -> {}'.format(src, dst)
32 |         os.rename(src, dst)
33 | 
34 | if __name__ == '__main__':
35 |     # src_dir should look something like:
36 |     #  src_dir = 'MCG-COCO-val2014-boxes'
37 |     src_dir = sys.argv[1]
38 |     munge(src_dir)
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois, scale):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois, scale):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois, scale):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | #from scipy.misc import imread, imresize
12 | import cv2
13 | try:
14 |     xrange  # Python 2
15 | except NameError:
16 |     xrange = range  # Python 3
17 | 
18 | 
19 | def im_list_to_blob(ims):
20 |     """Convert a list of images into a network input.
21 | 
22 |     Assumes images are already prepared (means subtracted, BGR order, ...).
23 |     """
24 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
25 |     num_images = len(ims)
26 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
27 |                     dtype=np.float32)
28 |     for i in xrange(num_images):
29 |         im = ims[i]
30 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
31 | 
32 |     return blob
33 | 
34 | def prep_im_for_blob(im, pixel_means, pixel_stds, target_size, max_size):
35 |     """Mean subtract and scale an image for use in a blob."""
36 |     
37 |     im = im.astype(np.float32, copy=False)
38 |     im /= 255.0
39 |     im -= pixel_means
40 |     im /= pixel_stds
41 |     # im = im[:, :, ::-1]
42 |     im_shape = im.shape
43 |     im_size_min = np.min(im_shape[0:2])
44 |     im_size_max = np.max(im_shape[0:2])
45 |     im_scale = float(target_size) / float(im_size_min)
46 |     # Prevent the biggest axis from being more than MAX_SIZE
47 |     # if np.round(im_scale * im_size_max) > max_size:
48 |     #     im_scale = float(max_size) / float(im_size_max)
49 |     # im = imresize(im, im_scale)
50 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
51 |                     interpolation=cv2.INTER_LINEAR)
52 | 
53 |     return im, im_scale
54 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             raise NotImplementedError
30 | 
31 |         return output
32 | 
33 |     def backward(self, grad_output):
34 |         assert(self.feature_size is not None and grad_output.is_cuda)
35 | 
36 |         batch_size, num_channels, data_height, data_width = self.feature_size
37 | 
38 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
39 |                                   data_width).zero_()
40 |         roi_align.roi_align_backward_cuda(self.aligned_height,
41 |                                           self.aligned_width,
42 |                                           self.spatial_scale, grad_output,
43 |                                           self.rois, grad_input)
44 | 
45 |         # print grad_input
46 | 
47 |         return grad_input, None
48 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool_py.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RoIPool(nn.Module):
 8 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 9 |         super(RoIPool, self).__init__()
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         batch_size, num_channels, data_height, data_width = features.size()
16 |         num_rois = rois.size()[0]
17 |         outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
18 | 
19 |         for roi_ind, roi in enumerate(rois):
20 |             batch_ind = int(roi[0].data[0])
21 |             roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
22 |                 roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
23 |             roi_width = max(roi_end_w - roi_start_w + 1, 1)
24 |             roi_height = max(roi_end_h - roi_start_h + 1, 1)
25 |             bin_size_w = float(roi_width) / float(self.pooled_width)
26 |             bin_size_h = float(roi_height) / float(self.pooled_height)
27 | 
28 |             for ph in range(self.pooled_height):
29 |                 hstart = int(np.floor(ph * bin_size_h))
30 |                 hend = int(np.ceil((ph + 1) * bin_size_h))
31 |                 hstart = min(data_height, max(0, hstart + roi_start_h))
32 |                 hend = min(data_height, max(0, hend + roi_start_h))
33 |                 for pw in range(self.pooled_width):
34 |                     wstart = int(np.floor(pw * bin_size_w))
35 |                     wend = int(np.ceil((pw + 1) * bin_size_w))
36 |                     wstart = min(data_width, max(0, wstart + roi_start_w))
37 |                     wend = min(data_width, max(0, wend + roi_start_w))
38 | 
39 |                     is_empty = (hend <= hstart) or(wend <= wstart)
40 |                     if is_empty:
41 |                         outputs[roi_ind, :, ph, pw] = 0
42 |                     else:
43 |                         data = features[batch_ind]
44 |                         outputs[roi_ind, :, ph, pw] = torch.max(
45 |                             torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1)
46 | 
47 |         return outputs
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/model/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import scipy.misc 
 5 | try:
 6 |     from StringIO import StringIO  # Python 2.7
 7 | except ImportError:
 8 |     from io import BytesIO         # Python 3.x
 9 | 
10 | 
11 | class Logger(object):
12 |     
13 |     def __init__(self, log_dir):
14 |         """Create a summary writer logging to log_dir."""
15 |         self.writer = tf.summary.FileWriter(log_dir)
16 | 
17 |     def scalar_summary(self, tag, value, step):
18 |         """Log a scalar variable."""
19 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
20 |         self.writer.add_summary(summary, step)
21 | 
22 |     def image_summary(self, tag, images, step):
23 |         """Log a list of images."""
24 | 
25 |         img_summaries = []
26 |         for i, img in enumerate(images):
27 |             # Write the image to a string
28 |             try:
29 |                 s = StringIO()
30 |             except:
31 |                 s = BytesIO()
32 |             scipy.misc.toimage(img).save(s, format="png")
33 | 
34 |             # Create an Image object
35 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
36 |                                        height=img.shape[0],
37 |                                        width=img.shape[1])
38 |             # Create a Summary value
39 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
40 | 
41 |         # Create and write Summary
42 |         summary = tf.Summary(value=img_summaries)
43 |         self.writer.add_summary(summary, step)
44 |         
45 |     def histo_summary(self, tag, values, step, bins=1000):
46 |         """Log a histogram of the tensor of values."""
47 | 
48 |         # Create a histogram using numpy
49 |         counts, bin_edges = np.histogram(values, bins=bins)
50 | 
51 |         # Fill the fields of the histogram proto
52 |         hist = tf.HistogramProto()
53 |         hist.min = float(np.min(values))
54 |         hist.max = float(np.max(values))
55 |         hist.num = int(np.prod(values.shape))
56 |         hist.sum = float(np.sum(values))
57 |         hist.sum_squares = float(np.sum(values**2))
58 | 
59 |         # Drop the start of the first bin
60 |         bin_edges = bin_edges[1:]
61 | 
62 |         # Add bin edges and counts
63 |         for edge in bin_edges:
64 |             hist.bucket_limit.append(edge)
65 |         for c in counts:
66 |             hist.bucket.append(c)
67 | 
68 |         # Create and write Summary
69 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
70 |         self.writer.add_summary(summary, step)
71 |         self.writer.flush()
72 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | from datasets.imagenet import imagenet
17 | from datasets.vg import vg
18 | 
19 | import numpy as np
20 | 
21 | # Set up voc_<year>_<split>
22 | for year in ['2007', '2012', '0712']:
23 |   for split in ['train', 'val', 'trainval', 'test']:
24 |     name = 'voc_{}_{}'.format(year, split)
25 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
26 | 
27 | # Set up coco_2014_<split>
28 | for year in ['2014']:
29 |   for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
30 |     name = 'coco_{}_{}'.format(year, split)
31 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
32 | 
33 | # Set up coco_2014_cap_<split>
34 | for year in ['2014']:
35 |   for split in ['train', 'val', 'capval', 'valminuscapval', 'trainval']:
36 |     name = 'coco_{}_{}'.format(year, split)
37 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
38 | 
39 | # Set up coco_2015_<split>
40 | for year in ['2015']:
41 |   for split in ['test', 'test-dev']:
42 |     name = 'coco_{}_{}'.format(year, split)
43 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
44 | 
45 | # Set up coco_2017_<split>
46 | for year in ['2017']:
47 |   for split in ['train', 'val']:
48 |     name = 'coco_{}_{}'.format(year, split)
49 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
50 | 
51 | # Set up vg_<split>
52 | # for version in ['1600-400-20']:
53 | #     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
54 | #         name = 'vg_{}_{}'.format(version,split)
55 | #         __sets[name] = (lambda split=split, version=version: vg(version, split))
56 | for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
57 |     for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
58 |         name = 'vg_{}_{}'.format(version,split)
59 |         __sets[name] = (lambda split=split, version=version: vg(version, split))
60 |         
61 | # set up image net.
62 | for split in ['train', 'val', 'val1', 'val2', 'test']:
63 |     name = 'imagenet_{}'.format(split)
64 |     devkit_path = 'data/imagenet/ILSVRC/devkit'
65 |     data_path = 'data/imagenet/ILSVRC'
66 |     __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))
67 | 
68 | def get_imdb(name):
69 |   """Get an imdb (image database) by name."""
70 |   if name not in __sets:
71 |     raise KeyError('Unknown dataset: {}'.format(name))
72 |   return __sets[name]()
73 | 
74 | 
75 | def list_imdbs():
76 |   """List all registered imdbs."""
77 |   return list(__sets.keys())
78 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | from scipy.misc import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | def get_minibatch(roidb, num_classes):
20 |   """Given a roidb, construct a minibatch sampled from it."""
21 |   num_images = len(roidb)
22 |   # Sample random scales to use for each image in this batch
23 |   random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
24 |                   size=num_images)
25 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
26 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
27 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
28 | 
29 |   # Get the input image blob, formatted for caffe
30 |   im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
31 | 
32 |   blobs = {'data': im_blob}
33 | 
34 |   assert len(im_scales) == 1, "Single batch only"
35 |   assert len(roidb) == 1, "Single batch only"
36 |   
37 |   # gt boxes: (x1, y1, x2, y2, cls)
38 |   if cfg.TRAIN.USE_ALL_GT:
39 |     # Include all ground truth boxes
40 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
41 |   else:
42 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
43 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0 & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
44 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
45 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
46 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
47 |   blobs['gt_boxes'] = gt_boxes
48 |   blobs['im_info'] = np.array(
49 |     [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
50 |     dtype=np.float32)
51 | 
52 |   blobs['img_id'] = roidb[0]['img_id']
53 | 
54 |   return blobs
55 | 
56 | def _get_image_blob(roidb, scale_inds):
57 |   """Builds an input blob from the images in the roidb at the specified
58 |   scales.
59 |   """
60 |   num_images = len(roidb)
61 | 
62 |   processed_ims = []
63 |   im_scales = []
64 |   for i in range(num_images):
65 |     #im = cv2.imread(roidb[i]['image'])
66 |     im = imread(roidb[i]['image'])
67 | 
68 |     if len(im.shape) == 2:
69 |       im = im[:,:,np.newaxis]
70 |       im = np.concatenate((im,im,im), axis=2)
71 |     # flip the channel, since the original one using cv2
72 |     # rgb -> bgr
73 |     # im = im[:,:,::-1]
74 | 
75 |     if roidb[i]['flipped']:
76 |       im = im[:, ::-1, :]
77 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
78 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, cfg.PIXEL_STDS, target_size,
79 |                     cfg.TRAIN.MAX_SIZE)
80 |     im_scales.append(im_scale)
81 |     processed_ims.append(im)
82 | 
83 |   # Create a blob to hold the input images
84 |   blob = im_list_to_blob(processed_ims)
85 | 
86 |   return blob, im_scales
87 | 


--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
 16 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 17 |     return bbox_overlaps_c(boxes, query_boxes)
 18 | 
 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 20 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 21 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 22 |     """
 23 |     Parameters
 24 |     ----------
 25 |     boxes: (N, 4) ndarray of float
 26 |     query_boxes: (K, 4) ndarray of float
 27 |     Returns
 28 |     -------
 29 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 30 |     """
 31 |     cdef unsigned int N = boxes.shape[0]
 32 |     cdef unsigned int K = query_boxes.shape[0]
 33 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 34 |     cdef DTYPE_t iw, ih, box_area
 35 |     cdef DTYPE_t ua
 36 |     cdef unsigned int k, n
 37 |     for k in range(K):
 38 |         box_area = (
 39 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 40 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 41 |         )
 42 |         for n in range(N):
 43 |             iw = (
 44 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 45 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 46 |             )
 47 |             if iw > 0:
 48 |                 ih = (
 49 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 50 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 51 |                 )
 52 |                 if ih > 0:
 53 |                     ua = float(
 54 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 55 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 56 |                         box_area - iw * ih
 57 |                     )
 58 |                     overlaps[n, k] = iw * ih / ua
 59 |     return overlaps
 60 | 
 61 | 
 62 | def bbox_intersections(
 63 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 64 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 65 |     return bbox_intersections_c(boxes, query_boxes)
 66 | 
 67 | 
 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 69 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 70 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 71 |     """
 72 |     For each query box compute the intersection ratio covered by boxes
 73 |     ----------
 74 |     Parameters
 75 |     ----------
 76 |     boxes: (N, 4) ndarray of float
 77 |     query_boxes: (K, 4) ndarray of float
 78 |     Returns
 79 |     -------
 80 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 81 |     """
 82 |     cdef unsigned int N = boxes.shape[0]
 83 |     cdef unsigned int K = query_boxes.shape[0]
 84 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 85 |     cdef DTYPE_t iw, ih, box_area
 86 |     cdef DTYPE_t ua
 87 |     cdef unsigned int k, n
 88 |     for k in range(K):
 89 |         box_area = (
 90 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 91 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 92 |         )
 93 |         for n in range(N):
 94 |             iw = (
 95 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 96 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 97 |             )
 98 |             if iw > 0:
 99 |                 ih = (
100 |                     min(boxes[n, 3], query_boxes[k, 3]) -
101 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
102 |                 )
103 |                 if ih > 0:
104 |                     intersec[n, k] = iw * ih / box_area
105 |     return intersec


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/datasets/vg_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import numpy as np
 10 | from datasets.voc_eval import voc_ap
 11 | 
 12 | 
 13 | 
 14 | def vg_eval( detpath,
 15 |              gt_roidb,
 16 |              image_index,
 17 |              classindex,
 18 |              ovthresh=0.5,
 19 |              use_07_metric=False,
 20 |              eval_attributes=False):
 21 |     """rec, prec, ap, sorted_scores, npos = voc_eval(
 22 |                                 detpath, 
 23 |                                 gt_roidb,
 24 |                                 image_index,
 25 |                                 classindex,
 26 |                                 [ovthresh],
 27 |                                 [use_07_metric])
 28 | 
 29 |     Top level function that does the Visual Genome evaluation.
 30 | 
 31 |     detpath: Path to detections
 32 |     gt_roidb: List of ground truth structs.
 33 |     image_index: List of image ids.
 34 |     classindex: Category index
 35 |     [ovthresh]: Overlap threshold (default = 0.5)
 36 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 37 |         (default False)
 38 |     """
 39 |     # extract gt objects for this class
 40 |     class_recs = {}
 41 |     npos = 0
 42 |     for item,imagename in zip(gt_roidb,image_index):
 43 |         if eval_attributes:
 44 |             bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
 45 |         else:
 46 |             bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
 47 |         difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
 48 |         det = [False] * bbox.shape[0]
 49 |         npos = npos + sum(~difficult)        
 50 |         class_recs[str(imagename)] = {'bbox': bbox,
 51 |                                  'difficult': difficult,
 52 |                                  'det': det}
 53 |     if npos == 0:
 54 |         # No ground truth examples
 55 |         return 0,0,0,0,npos
 56 | 
 57 |     # read dets
 58 |     with open(detpath, 'r') as f:
 59 |         lines = f.readlines()
 60 |     if len(lines) == 0:
 61 |         # No detection examples
 62 |         return 0,0,0,0,npos
 63 | 
 64 |     splitlines = [x.strip().split(' ') for x in lines]
 65 |     image_ids = [x[0] for x in splitlines]
 66 |     confidence = np.array([float(x[1]) for x in splitlines])
 67 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 68 | 
 69 |     # sort by confidence
 70 |     sorted_ind = np.argsort(-confidence)
 71 |     sorted_scores = -np.sort(-confidence)
 72 |     BB = BB[sorted_ind, :]
 73 |     image_ids = [image_ids[x] for x in sorted_ind]
 74 | 
 75 |     # go down dets and mark TPs and FPs
 76 |     nd = len(image_ids)
 77 |     tp = np.zeros(nd)
 78 |     fp = np.zeros(nd)
 79 |     for d in range(nd):
 80 |         R = class_recs[image_ids[d]]
 81 |         bb = BB[d, :].astype(float)
 82 |         ovmax = -np.inf
 83 |         BBGT = R['bbox'].astype(float)
 84 | 
 85 |         if BBGT.size > 0:
 86 |             # compute overlaps
 87 |             # intersection
 88 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
 89 |             iymin = np.maximum(BBGT[:, 1], bb[1])
 90 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
 91 |             iymax = np.minimum(BBGT[:, 3], bb[3])
 92 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
 93 |             ih = np.maximum(iymax - iymin + 1., 0.)
 94 |             inters = iw * ih
 95 | 
 96 |             # union
 97 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
 98 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
 99 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
100 | 
101 |             overlaps = inters / uni
102 |             ovmax = np.max(overlaps)
103 |             jmax = np.argmax(overlaps)
104 | 
105 |         if ovmax > ovthresh:
106 |             if not R['difficult'][jmax]:
107 |                 if not R['det'][jmax]:
108 |                     tp[d] = 1.
109 |                     R['det'][jmax] = 1
110 |                 else:
111 |                     fp[d] = 1.
112 |         else:
113 |             fp[d] = 1.
114 | 
115 |     # compute precision recall
116 |     fp = np.cumsum(fp)
117 |     tp = np.cumsum(tp)
118 |     rec = tp / float(npos)
119 |     # avoid divide by zero in case the first detection matches a difficult
120 |     # ground truth
121 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
122 |     ap = voc_ap(rec, prec, use_07_metric)
123 |     
124 |     return rec, prec, ap, sorted_scores, npos
125 | 


--------------------------------------------------------------------------------
/lib/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import datasets
  7 | import numpy as np
  8 | from model.utils.config import cfg
  9 | from datasets.factory import get_imdb
 10 | import PIL
 11 | import pdb
 12 | 
 13 | 
 14 | def prepare_roidb(imdb):
 15 |     """Enrich the imdb's roidb by adding some derived quantities that
 16 |     are useful for training. This function precomputes the maximum
 17 |     overlap, taken over ground-truth boxes, between each ROI and
 18 |     each ground-truth box. The class with maximum overlap is also
 19 |     recorded.
 20 |     """
 21 | 
 22 |     roidb = imdb.roidb
 23 |     if not (imdb.name.startswith('coco')):
 24 |         sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 25 |                  for i in range(imdb.num_images)]
 26 | 
 27 |     for i in range(len(imdb.image_index)):
 28 |         roidb[i]['img_id'] = imdb.image_id_at(i)
 29 |         roidb[i]['image'] = imdb.image_path_at(i)
 30 |         if not (imdb.name.startswith('coco')):
 31 |             roidb[i]['width'] = sizes[i][0]
 32 |             roidb[i]['height'] = sizes[i][1]
 33 |         # need gt_overlaps as a dense array for argmax
 34 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 35 |         # max overlap with gt over classes (columns)
 36 |         max_overlaps = gt_overlaps.max(axis=1)
 37 |         # gt class that had the max overlap
 38 |         max_classes = gt_overlaps.argmax(axis=1)
 39 |         roidb[i]['max_classes'] = max_classes
 40 |         roidb[i]['max_overlaps'] = max_overlaps
 41 |         # sanity checks
 42 |         # max overlap of 0 => class should be zero (background)
 43 |         zero_inds = np.where(max_overlaps == 0)[0]
 44 |         assert all(max_classes[zero_inds] == 0)
 45 |         # max overlap > 0 => class should not be zero (must be a fg class)
 46 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 47 |         assert all(max_classes[nonzero_inds] != 0)
 48 | 
 49 | 
 50 | def rank_roidb_ratio(roidb):
 51 |     # rank roidb based on the ratio between width and height.
 52 |     ratio_large = 2  # largest ratio to preserve.
 53 |     ratio_small = 0.5  # smallest ratio to preserve.
 54 | 
 55 |     ratio_list = []
 56 |     for i in range(len(roidb)):
 57 |         width = roidb[i]['width']
 58 |         height = roidb[i]['height']
 59 |         ratio = width / float(height)
 60 | 
 61 |         if cfg.TRAIN.ASPECT_CROPPING:
 62 |             if ratio > ratio_large:
 63 |                 roidb[i]['need_crop'] = 1
 64 |                 ratio = ratio_large
 65 |             elif ratio < ratio_small:
 66 |                 roidb[i]['need_crop'] = 1
 67 |                 ratio = ratio_small
 68 |             else:
 69 |                 roidb[i]['need_crop'] = 0
 70 |         else:
 71 |             roidb[i]['need_crop'] = 0
 72 | 
 73 |         ratio_list.append(ratio)
 74 | 
 75 |     ratio_list = np.array(ratio_list)
 76 |     ratio_index = np.argsort(ratio_list)
 77 |     return ratio_list[ratio_index], ratio_index
 78 | 
 79 | 
 80 | def filter_roidb(roidb):
 81 |     # filter the image without bounding box.
 82 |     print('before filtering, there are %d images...' % (len(roidb)))
 83 |     i = 0
 84 |     while i < len(roidb):
 85 |         if len(roidb[i]['boxes']) == 0:
 86 |             del roidb[i]
 87 |             i -= 1
 88 |         i += 1
 89 | 
 90 |     print('after filtering, there are %d images...' % (len(roidb)))
 91 |     return roidb
 92 | 
 93 | 
 94 | def combined_roidb(imdb_names, training=True):
 95 |     """
 96 |     Combine multiple roidbs
 97 |     """
 98 | 
 99 |     def get_training_roidb(imdb):
100 |         """Returns a roidb (Region of Interest database) for use in training."""
101 |         if cfg.TRAIN.USE_FLIPPED:
102 |             print('Appending horizontally-flipped training examples...')
103 |             imdb.append_flipped_images()
104 |             print('done')
105 | 
106 |         print('Preparing training data...')
107 | 
108 |         prepare_roidb(imdb)
109 |         # ratio_index = rank_roidb_ratio(imdb)
110 |         print('done')
111 | 
112 |         return imdb.roidb
113 | 
114 |     def get_roidb(imdb_name):
115 |         imdb = get_imdb(imdb_name)
116 |         print('Loaded dataset `{:s}` for training'.format(imdb.name))
117 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
118 |         print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
119 |         roidb = get_training_roidb(imdb)
120 |         return roidb
121 | 
122 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
123 |     roidb = roidbs[0]
124 | 
125 |     if len(roidbs) > 1:
126 |         for r in roidbs[1:]:
127 |             roidb.extend(r)
128 |         tmp = get_imdb(imdb_names.split('+')[1])
129 |         imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
130 |     else:
131 |         imdb = get_imdb(imdb_names)
132 | 
133 |     if training:
134 |         roidb = filter_roidb(roidb)
135 | 
136 |     ratio_list, ratio_index = rank_roidb_ratio(roidb)
137 | 
138 |     return imdb, roidb, ratio_list, ratio_index
139 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import os
 10 | from os.path import join as pjoin
 11 | import numpy as np
 12 | from distutils.core import setup
 13 | from distutils.extension import Extension
 14 | from Cython.Distutils import build_ext
 15 | 
 16 | 
 17 | def find_in_path(name, path):
 18 |     "Find a file in a search path"
 19 |     # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 20 |     for dir in path.split(os.pathsep):
 21 |         binpath = pjoin(dir, name)
 22 |         if os.path.exists(binpath):
 23 |             return os.path.abspath(binpath)
 24 |     return None
 25 | 
 26 | 
 27 | # def locate_cuda():
 28 | #     """Locate the CUDA environment on the system
 29 | #
 30 | #     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 31 | #     and values giving the absolute path to each directory.
 32 | #
 33 | #     Starts by looking for the CUDAHOME env variable. If not found, everything
 34 | #     is based on finding 'nvcc' in the PATH.
 35 | #     """
 36 | #
 37 | #     # first check if the CUDAHOME env variable is in use
 38 | #     if 'CUDAHOME' in os.environ:
 39 | #         home = os.environ['CUDAHOME']
 40 | #         nvcc = pjoin(home, 'bin', 'nvcc')
 41 | #     else:
 42 | #         # otherwise, search the PATH for NVCC
 43 | #         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 44 | #         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 45 | #         if nvcc is None:
 46 | #             raise EnvironmentError('The nvcc binary could not be '
 47 | #                                    'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 48 | #         home = os.path.dirname(os.path.dirname(nvcc))
 49 | #
 50 | #     cudaconfig = {'home': home, 'nvcc': nvcc,
 51 | #                   'include': pjoin(home, 'include'),
 52 | #                   'lib64': pjoin(home, 'lib64')}
 53 | #     for k, v in cudaconfig.iteritems():
 54 | #         if not os.path.exists(v):
 55 | #             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 56 | #
 57 | #     return cudaconfig
 58 | 
 59 | 
 60 | # CUDA = locate_cuda()
 61 | 
 62 | # Obtain the numpy include directory.  This logic works across numpy versions.
 63 | try:
 64 |     numpy_include = np.get_include()
 65 | except AttributeError:
 66 |     numpy_include = np.get_numpy_include()
 67 | 
 68 | 
 69 | def customize_compiler_for_nvcc(self):
 70 |     """inject deep into distutils to customize how the dispatch
 71 |     to gcc/nvcc works.
 72 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 73 |     injected in, and still have the right customizations (i.e.
 74 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 75 |     the OO route, I have this. Note, it's kindof like a wierd functional
 76 |     subclassing going on."""
 77 | 
 78 |     # tell the compiler it can processes .cu
 79 |     self.src_extensions.append('.cu')
 80 | 
 81 |     # save references to the default compiler_so and _comple methods
 82 |     default_compiler_so = self.compiler_so
 83 |     super = self._compile
 84 | 
 85 |     # now redefine the _compile method. This gets executed for each
 86 |     # object but distutils doesn't have the ability to change compilers
 87 |     # based on source extension: we add it.
 88 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 89 |         print(extra_postargs)
 90 |         if os.path.splitext(src)[1] == '.cu':
 91 |             # use the cuda for .cu files
 92 |             self.set_executable('compiler_so', CUDA['nvcc'])
 93 |             # use only a subset of the extra_postargs, which are 1-1 translated
 94 |             # from the extra_compile_args in the Extension class
 95 |             postargs = extra_postargs['nvcc']
 96 |         else:
 97 |             postargs = extra_postargs['gcc']
 98 | 
 99 |         super(obj, src, ext, cc_args, postargs, pp_opts)
100 |         # reset the default compiler_so, which we might have changed for cuda
101 |         self.compiler_so = default_compiler_so
102 | 
103 |     # inject our redefined _compile method into the class
104 |     self._compile = _compile
105 | 
106 | 
107 | # run the customize_compiler
108 | class custom_build_ext(build_ext):
109 |     def build_extensions(self):
110 |         customize_compiler_for_nvcc(self.compiler)
111 |         build_ext.build_extensions(self)
112 | 
113 | 
114 | ext_modules = [
115 |     Extension(
116 |         "model.utils.cython_bbox",
117 |         ["model/utils/bbox.pyx"],
118 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 |         include_dirs=[numpy_include]
120 |     ),
121 |     Extension(
122 |         "model.nms.cpu_nms",
123 |         ["model/nms/cpu_nms.pyx"],
124 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
125 |         include_dirs=[numpy_include]
126 |     ),
127 |     Extension(
128 |         'pycocotools._mask',
129 |         sources=['pycocotools/maskApi.c', 'pycocotools/_mask.pyx'],
130 |         include_dirs=[numpy_include, 'pycocotools'],
131 |         extra_compile_args={
132 |             'gcc': ['-Wno-cpp', '-Wno-unused-function', '-std=c99']},
133 |     ),
134 | ]
135 | 
136 | setup(
137 |     name='faster_rcnn',
138 |     ext_modules=ext_modules,
139 |     # inject our custom trigger
140 |     cmdclass={'build_ext': custom_build_ext},
141 | )


--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/model/rpn/rpn_fpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | 
  6 | from model.utils.config import cfg
  7 | from .proposal_layer_fpn import _ProposalLayer_FPN
  8 | from .anchor_target_layer_fpn import _AnchorTargetLayer_FPN
  9 | from model.utils.net_utils import _smooth_l1_loss
 10 | 
 11 | import numpy as np
 12 | import math
 13 | import pdb
 14 | import time
 15 | 
 16 | class _RPN_FPN(nn.Module):
 17 |     """ region proposal network """
 18 |     def __init__(self, din):
 19 |         super(_RPN_FPN, self).__init__()
 20 | 
 21 |         self.din = din  # get depth of input feature map, e.g., 512
 22 |         self.anchor_ratios = cfg.ANCHOR_RATIOS
 23 |         self.anchor_scales = cfg.ANCHOR_SCALES
 24 |         self.feat_stride = cfg.FEAT_STRIDE[0]
 25 | 
 26 |         # define the convrelu layers processing input feature map
 27 |         self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 28 | 
 29 |         # define bg/fg classifcation score layer
 30 |         # self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
 31 |         self.nc_score_out = 1 * len(self.anchor_ratios) * 2 # 2(bg/fg) * 3 (anchor ratios) * 1 (anchor scale)
 32 |         self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
 33 | 
 34 |         # define anchor box offset prediction layer
 35 |         # self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
 36 |         self.nc_bbox_out = 1 * len(self.anchor_ratios) * 4 # 4(coords) * 3 (anchors) * 1 (anchor scale)
 37 |         self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
 38 | 
 39 |         # define proposal layer
 40 |         self.RPN_proposal = _ProposalLayer_FPN(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 41 | 
 42 |         # define anchor target layer
 43 |         self.RPN_anchor_target = _AnchorTargetLayer_FPN(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 44 | 
 45 |         self.rpn_loss_cls = 0
 46 |         self.rpn_loss_box = 0
 47 | 
 48 |     @staticmethod
 49 |     def reshape(x, d):
 50 |         input_shape = x.size()
 51 |         x = x.view(
 52 |             input_shape[0],
 53 |             int(d),
 54 |             int(float(input_shape[1] * input_shape[2]) / float(d)),
 55 |             input_shape[3]
 56 |         )
 57 |         return x
 58 | 
 59 |     def forward(self, rpn_feature_maps, im_info, gt_boxes, num_boxes):        
 60 | 
 61 | 
 62 |         n_feat_maps = len(rpn_feature_maps)
 63 | 
 64 |         rpn_cls_scores = []
 65 |         rpn_cls_probs = []
 66 |         rpn_bbox_preds = []
 67 |         rpn_shapes = []
 68 | 
 69 |         for i in range(n_feat_maps):
 70 |             feat_map = rpn_feature_maps[i]
 71 |             batch_size = feat_map.size(0)
 72 |             
 73 |             # return feature map after convrelu layer
 74 |             rpn_conv1 = F.relu(self.RPN_Conv(feat_map), inplace=True)
 75 |             # get rpn classification score
 76 |             rpn_cls_score = self.RPN_cls_score(rpn_conv1)
 77 | 
 78 |             rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 79 |             rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape)
 80 |             rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
 81 | 
 82 |             # get rpn offsets to the anchor boxes
 83 |             rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
 84 | 
 85 |             rpn_shapes.append([rpn_cls_score.size()[2], rpn_cls_score.size()[3]])
 86 |             rpn_cls_scores.append(rpn_cls_score.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2))
 87 |             rpn_cls_probs.append(rpn_cls_prob.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2))
 88 |             rpn_bbox_preds.append(rpn_bbox_pred.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 4))
 89 | 
 90 |         rpn_cls_score_alls = torch.cat(rpn_cls_scores, 1)
 91 |         rpn_cls_prob_alls = torch.cat(rpn_cls_probs, 1)
 92 |         rpn_bbox_pred_alls = torch.cat(rpn_bbox_preds, 1)
 93 | 
 94 |         n_rpn_pred = rpn_cls_score_alls.size(1)
 95 | 
 96 |         # proposal layer
 97 |         cfg_key = 'TRAIN' if self.training else 'TEST'
 98 | 
 99 |         rois = self.RPN_proposal((rpn_cls_prob_alls.data, rpn_bbox_pred_alls.data,
100 |                                  im_info, cfg_key, rpn_shapes))
101 | 
102 |         self.rpn_loss_cls = 0
103 |         self.rpn_loss_box = 0
104 | 
105 |         # generating training labels and build the rpn loss
106 |         if self.training:
107 |             assert gt_boxes is not None
108 | 
109 |             rpn_data = self.RPN_anchor_target((rpn_cls_score_alls.data, gt_boxes, im_info, num_boxes, rpn_shapes))
110 | 
111 |             # compute classification loss
112 |             rpn_label = rpn_data[0].view(batch_size, -1)
113 |             rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
114 |             rpn_cls_score = torch.index_select(rpn_cls_score_alls.view(-1,2), 0, rpn_keep)
115 |             rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
116 |             rpn_label = Variable(rpn_label.long())
117 |             self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
118 |             fg_cnt = torch.sum(rpn_label.data.ne(0))
119 | 
120 |             rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
121 | 
122 |             # compute bbox regression loss
123 |             rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights.unsqueeze(2) \
124 |                     .expand(batch_size, rpn_bbox_inside_weights.size(1), 4))
125 |             rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights.unsqueeze(2) \
126 |                     .expand(batch_size, rpn_bbox_outside_weights.size(1), 4))
127 |             rpn_bbox_targets = Variable(rpn_bbox_targets)
128 |             
129 |             self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred_alls, rpn_bbox_targets, rpn_bbox_inside_weights, 
130 |                             rpn_bbox_outside_weights, sigma=3)
131 | 
132 |         return rois, self.rpn_loss_cls, self.rpn_loss_box
133 | 


--------------------------------------------------------------------------------
/lib/model/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------------------
  2 | # Soft-NMS: Improving Object Detection With One Line of Code
  3 | # Copyright (c) University of Maryland, College Park
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Navaneeth Bodla and Bharat Singh
  6 | # ----------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
 18 |     cdef unsigned int N = boxes.shape[0]
 19 |     cdef float iw, ih, box_area
 20 |     cdef float ua
 21 |     cdef int pos = 0
 22 |     cdef float maxscore = 0
 23 |     cdef int maxpos = 0
 24 |     cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
 25 | 
 26 |     for i in range(N):
 27 |         maxscore = boxes[i, 4]
 28 |         maxpos = i
 29 | 
 30 |         tx1 = boxes[i,0]
 31 |         ty1 = boxes[i,1]
 32 |         tx2 = boxes[i,2]
 33 |         ty2 = boxes[i,3]
 34 |         ts = boxes[i,4]
 35 | 
 36 |         pos = i + 1
 37 | 	# get max box
 38 |         while pos < N:
 39 |             if maxscore < boxes[pos, 4]:
 40 |                 maxscore = boxes[pos, 4]
 41 |                 maxpos = pos
 42 |             pos = pos + 1
 43 | 
 44 | 	# add max box as a detection
 45 |         boxes[i,0] = boxes[maxpos,0]
 46 |         boxes[i,1] = boxes[maxpos,1]
 47 |         boxes[i,2] = boxes[maxpos,2]
 48 |         boxes[i,3] = boxes[maxpos,3]
 49 |         boxes[i,4] = boxes[maxpos,4]
 50 | 
 51 | 	# swap ith box with position of max box
 52 |         boxes[maxpos,0] = tx1
 53 |         boxes[maxpos,1] = ty1
 54 |         boxes[maxpos,2] = tx2
 55 |         boxes[maxpos,3] = ty2
 56 |         boxes[maxpos,4] = ts
 57 | 
 58 |         tx1 = boxes[i,0]
 59 |         ty1 = boxes[i,1]
 60 |         tx2 = boxes[i,2]
 61 |         ty2 = boxes[i,3]
 62 |         ts = boxes[i,4]
 63 | 
 64 |         pos = i + 1
 65 | 	# NMS iterations, note that N changes if detection boxes fall below threshold
 66 |         while pos < N:
 67 |             x1 = boxes[pos, 0]
 68 |             y1 = boxes[pos, 1]
 69 |             x2 = boxes[pos, 2]
 70 |             y2 = boxes[pos, 3]
 71 |             s = boxes[pos, 4]
 72 | 
 73 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
 74 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
 75 |             if iw > 0:
 76 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
 77 |                 if ih > 0:
 78 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
 79 |                     ov = iw * ih / ua #iou between max box and detection box
 80 | 
 81 |                     if method == 1: # linear
 82 |                         if ov > Nt:
 83 |                             weight = 1 - ov
 84 |                         else:
 85 |                             weight = 1
 86 |                     elif method == 2: # gaussian
 87 |                         weight = np.exp(-(ov * ov)/sigma)
 88 |                     else: # original NMS
 89 |                         if ov > Nt:
 90 |                             weight = 0
 91 |                         else:
 92 |                             weight = 1
 93 | 
 94 |                     boxes[pos, 4] = weight*boxes[pos, 4]
 95 | 
 96 | 		    # if box score falls below threshold, discard the box by swapping with last box
 97 | 		    # update N
 98 |                     if boxes[pos, 4] < threshold:
 99 |                         boxes[pos,0] = boxes[N-1, 0]
100 |                         boxes[pos,1] = boxes[N-1, 1]
101 |                         boxes[pos,2] = boxes[N-1, 2]
102 |                         boxes[pos,3] = boxes[N-1, 3]
103 |                         boxes[pos,4] = boxes[N-1, 4]
104 |                         N = N - 1
105 |                         pos = pos - 1
106 | 
107 |             pos = pos + 1
108 | 
109 |     keep = [i for i in range(N)]
110 |     return keep
111 | 
112 | 
113 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
114 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
115 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
116 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
117 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
118 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
119 | 
120 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
121 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
122 | 
123 |     cdef int ndets = dets.shape[0]
124 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
125 |             np.zeros((ndets), dtype=np.int)
126 | 
127 |     # nominal indices
128 |     cdef int _i, _j
129 |     # sorted indices
130 |     cdef int i, j
131 |     # temp variables for box i's (the box currently under consideration)
132 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
133 |     # variables for computing overlap with box j (lower scoring box)
134 |     cdef np.float32_t xx1, yy1, xx2, yy2
135 |     cdef np.float32_t w, h
136 |     cdef np.float32_t inter, ovr
137 | 
138 |     keep = []
139 |     for _i in range(ndets):
140 |         i = order[_i]
141 |         if suppressed[i] == 1:
142 |             continue
143 |         keep.append(i)
144 |         ix1 = x1[i]
145 |         iy1 = y1[i]
146 |         ix2 = x2[i]
147 |         iy2 = y2[i]
148 |         iarea = areas[i]
149 |         for _j in range(_i + 1, ndets):
150 |             j = order[_j]
151 |             if suppressed[j] == 1:
152 |                 continue
153 |             xx1 = max(ix1, x1[j])
154 |             yy1 = max(iy1, y1[j])
155 |             xx2 = min(ix2, x2[j])
156 |             yy2 = min(iy2, y2[j])
157 |             w = max(0.0, xx2 - xx1 + 1)
158 |             h = max(0.0, yy2 - yy1 + 1)
159 |             inter = w * h
160 |             ovr = inter / (iarea + areas[j] - inter)
161 |             if ovr >= thresh:
162 |                 suppressed[j] = 1
163 | 
164 |     return keep


--------------------------------------------------------------------------------
/lib/model/rpn/proposal_layer_fpn.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | # --------------------------------------------------------
  8 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
  9 | # --------------------------------------------------------
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import numpy as np
 14 | import math
 15 | import yaml
 16 | from model.utils.config import cfg
 17 | from .generate_anchors import generate_anchors, generate_anchors_all_pyramids
 18 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
 19 | from model.nms.nms_wrapper import nms
 20 | 
 21 | import pdb
 22 | 
 23 | DEBUG = False
 24 | 
 25 | class _ProposalLayer_FPN(nn.Module):
 26 |     """
 27 |     Outputs object detection proposals by applying estimated bounding-box
 28 |     transformations to a set of regular boxes (called "anchors").
 29 |     """
 30 | 
 31 |     def __init__(self, feat_stride, scales, ratios):
 32 |         super(_ProposalLayer_FPN, self).__init__()
 33 |         self._anchor_ratios = ratios
 34 |         self._feat_stride = feat_stride
 35 |         self._fpn_scales = np.array(cfg.FPN_ANCHOR_SCALES)
 36 |         self._fpn_feature_strides = np.array(cfg.FPN_FEAT_STRIDES)
 37 |         self._fpn_anchor_stride = cfg.FPN_ANCHOR_STRIDE
 38 |         # self._anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, ratios, self._fpn_feature_strides, fpn_anchor_stride))
 39 |         # self._num_anchors = self._anchors.size(0)
 40 | 
 41 |     def forward(self, input):
 42 | 
 43 |         # Algorithm:
 44 |         #
 45 |         # for each (H, W) location i
 46 |         #   generate A anchor boxes centered on cell i
 47 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 48 |         # clip predicted boxes to image
 49 |         # remove predicted boxes with either height or width < threshold
 50 |         # sort all (proposal, score) pairs by score from highest to lowest
 51 |         # take top pre_nms_topN proposals before NMS
 52 |         # apply NMS with threshold 0.7 to remaining proposals
 53 |         # take after_nms_topN proposals after NMS
 54 |         # return the top proposals (-> RoIs top, scores top)
 55 | 
 56 | 
 57 |         # the first set of _num_anchors channels are bg probs
 58 |         # the second set are the fg probs
 59 |         scores = input[0][:, :, 1]  # batch_size x num_rois x 1
 60 |         bbox_deltas = input[1]      # batch_size x num_rois x 4
 61 |         im_info = input[2]
 62 |         cfg_key = input[3]
 63 |         feat_shapes = input[4]        
 64 | 
 65 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 66 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 67 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 68 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 69 | 
 70 |         batch_size = bbox_deltas.size(0)
 71 | 
 72 |         anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, 
 73 |                 feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores)
 74 |         num_anchors = anchors.size(0)
 75 | 
 76 |         anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4)
 77 | 
 78 |         # Convert anchors into proposals via bbox transformations
 79 |         proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
 80 | 
 81 |         # 2. clip predicted boxes to image
 82 |         proposals = clip_boxes(proposals, im_info, batch_size)
 83 |         # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()
 84 |                 
 85 |         scores_keep = scores
 86 |         proposals_keep = proposals
 87 | 
 88 |         _, order = torch.sort(scores_keep, 1, True)
 89 | 
 90 |         output = scores.new(batch_size, post_nms_topN, 5).zero_()
 91 |         for i in range(batch_size):
 92 |             # # 3. remove predicted boxes with either height or width < threshold
 93 |             # # (NOTE: convert min_size to input image scale stored in im_info[2])
 94 |             proposals_single = proposals_keep[i]
 95 |             scores_single = scores_keep[i]
 96 | 
 97 |             # # 4. sort all (proposal, score) pairs by score from highest to lowest
 98 |             # # 5. take top pre_nms_topN (e.g. 6000)
 99 |             order_single = order[i]
100 | 
101 |             if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
102 |                 order_single = order_single[:pre_nms_topN]
103 | 
104 |             proposals_single = proposals_single[order_single, :]
105 |             scores_single = scores_single[order_single].view(-1,1)
106 | 
107 |             # 6. apply nms (e.g. threshold = 0.7)
108 |             # 7. take after_nms_topN (e.g. 300)
109 |             # 8. return the top proposals (-> RoIs top)
110 | 
111 |             keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
112 |             keep_idx_i = keep_idx_i.long().view(-1)
113 | 
114 |             if post_nms_topN > 0:
115 |                 keep_idx_i = keep_idx_i[:post_nms_topN]
116 |             proposals_single = proposals_single[keep_idx_i, :]
117 |             scores_single = scores_single[keep_idx_i, :]
118 | 
119 |             # padding 0 at the end.
120 |             num_proposal = proposals_single.size(0)
121 |             output[i,:,0] = i
122 |             output[i,:num_proposal,1:] = proposals_single
123 | 
124 |         return output
125 | 
126 |     def backward(self, top, propagate_down, bottom):
127 |         """This layer does not propagate gradients."""
128 |         pass
129 | 
130 |     def reshape(self, bottom, top):
131 |         """Reshaping happens during the call to forward."""
132 |         pass
133 | 
134 |     def _filter_boxes(self, boxes, min_size):
135 |         """Remove all boxes with any side smaller than min_size."""
136 |         ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
137 |         hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
138 |         keep = ((ws >= min_size) & (hs >= min_size))
139 |         return keep
140 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include "roi_crop_cuda_kernel.h"
  5 | 
  6 | #define real float
  7 | 
  8 | // this symbol will be resolved automatically from PyTorch libs
  9 | extern THCState *state;
 10 | 
 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
 12 | // we assume BHWD format in inputImages
 13 | // we assume BHW(YX) format on grids
 14 | 
 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
 16 | //  THCState *state = getCutorchState(L);
 17 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 18 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 19 | //  THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 20 | 
 21 |   int success = 0;
 22 |   success = BilinearSamplerBHWD_updateOutput_cuda_kernel(output->size[1],
 23 |                                                output->size[3],
 24 |                                                output->size[2],
 25 |                                                output->size[0],
 26 |                                                THCudaTensor_size(state, inputImages, 1),
 27 |                                                THCudaTensor_size(state, inputImages, 2),
 28 |                                                THCudaTensor_size(state, inputImages, 3),
 29 |                                                THCudaTensor_size(state, inputImages, 0),
 30 |                                                THCudaTensor_data(state, inputImages),
 31 |                                                THCudaTensor_stride(state, inputImages, 0),
 32 |                                                THCudaTensor_stride(state, inputImages, 1),
 33 |                                                THCudaTensor_stride(state, inputImages, 2),
 34 |                                                THCudaTensor_stride(state, inputImages, 3),
 35 |                                                THCudaTensor_data(state, grids),
 36 |                                                THCudaTensor_stride(state, grids, 0),
 37 |                                                THCudaTensor_stride(state, grids, 3),
 38 |                                                THCudaTensor_stride(state, grids, 1),
 39 |                                                THCudaTensor_stride(state, grids, 2),
 40 |                                                THCudaTensor_data(state, output),
 41 |                                                THCudaTensor_stride(state, output, 0),
 42 |                                                THCudaTensor_stride(state, output, 1),
 43 |                                                THCudaTensor_stride(state, output, 2),
 44 |                                                THCudaTensor_stride(state, output, 3),
 45 |                                                THCState_getCurrentStream(state));
 46 | 
 47 |   //check for errors
 48 |   if (!success) {
 49 |     THError("aborting");
 50 |   }
 51 |   return 1;
 52 | }
 53 | 
 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
 55 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput)
 56 | {
 57 | //  THCState *state = getCutorchState(L);
 58 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 59 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 60 | //  THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 61 | //  THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
 62 | //  THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
 63 | 
 64 |   int success = 0;
 65 |   success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(gradOutput->size[1],
 66 |                                                   gradOutput->size[3],
 67 |                                                   gradOutput->size[2],
 68 |                                                   gradOutput->size[0],
 69 |                                                   THCudaTensor_size(state, inputImages, 1),
 70 |                                                   THCudaTensor_size(state, inputImages, 2),
 71 |                                                   THCudaTensor_size(state, inputImages, 3),
 72 |                                                   THCudaTensor_size(state, inputImages, 0),
 73 |                                                   THCudaTensor_data(state, inputImages),
 74 |                                                   THCudaTensor_stride(state, inputImages, 0),
 75 |                                                   THCudaTensor_stride(state, inputImages, 1),
 76 |                                                   THCudaTensor_stride(state, inputImages, 2),
 77 |                                                   THCudaTensor_stride(state, inputImages, 3),
 78 |                                                   THCudaTensor_data(state, grids),
 79 |                                                   THCudaTensor_stride(state, grids, 0),
 80 |                                                   THCudaTensor_stride(state, grids, 3),
 81 |                                                   THCudaTensor_stride(state, grids, 1),
 82 |                                                   THCudaTensor_stride(state, grids, 2),
 83 |                                                   THCudaTensor_data(state, gradInputImages),
 84 |                                                   THCudaTensor_stride(state, gradInputImages, 0),
 85 |                                                   THCudaTensor_stride(state, gradInputImages, 1),
 86 |                                                   THCudaTensor_stride(state, gradInputImages, 2),
 87 |                                                   THCudaTensor_stride(state, gradInputImages, 3),
 88 |                                                   THCudaTensor_data(state, gradGrids),
 89 |                                                   THCudaTensor_stride(state, gradGrids, 0),
 90 |                                                   THCudaTensor_stride(state, gradGrids, 3),
 91 |                                                   THCudaTensor_stride(state, gradGrids, 1),
 92 |                                                   THCudaTensor_stride(state, gradGrids, 2),
 93 |                                                   THCudaTensor_data(state, gradOutput),
 94 |                                                   THCudaTensor_stride(state, gradOutput, 0),
 95 |                                                   THCudaTensor_stride(state, gradOutput, 1),
 96 |                                                   THCudaTensor_stride(state, gradOutput, 2),
 97 |                                                   THCudaTensor_stride(state, gradOutput, 3),
 98 |                                                   THCState_getCurrentStream(state));
 99 | 
100 |   //check for errors
101 |   if (!success) {
102 |     THError("aborting");
103 |   }
104 |   return 1;
105 | }
106 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | 
 15 | def parse_rec(filename):
 16 |   """ Parse a PASCAL VOC xml file """
 17 |   tree = ET.parse(filename)
 18 |   objects = []
 19 |   for obj in tree.findall('object'):
 20 |     obj_struct = {}
 21 |     obj_struct['name'] = obj.find('name').text
 22 |     obj_struct['pose'] = obj.find('pose').text
 23 |     obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |     obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |     bbox = obj.find('bndbox')
 26 |     obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                           int(bbox.find('ymin').text),
 28 |                           int(bbox.find('xmax').text),
 29 |                           int(bbox.find('ymax').text)]
 30 |     objects.append(obj_struct)
 31 | 
 32 |   return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |   Compute VOC AP given precision and recall.
 38 |   If use_07_metric is true, uses the
 39 |   VOC 07 11 point method (default:False).
 40 |   """
 41 |   if use_07_metric:
 42 |     # 11 point metric
 43 |     ap = 0.
 44 |     for t in np.arange(0., 1.1, 0.1):
 45 |       if np.sum(rec >= t) == 0:
 46 |         p = 0
 47 |       else:
 48 |         p = np.max(prec[rec >= t])
 49 |       ap = ap + p / 11.
 50 |   else:
 51 |     # correct AP calculation
 52 |     # first append sentinel values at the end
 53 |     mrec = np.concatenate(([0.], rec, [1.]))
 54 |     mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |     # compute the precision envelope
 57 |     for i in range(mpre.size - 1, 0, -1):
 58 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |     # to calculate area under PR curve, look for points
 61 |     # where X axis (recall) changes value
 62 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |     # and sum (\Delta recall) * prec
 65 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |   return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath,
 70 |              annopath,
 71 |              imagesetfile,
 72 |              classname,
 73 |              cachedir,
 74 |              ovthresh=0.5,
 75 |              use_07_metric=False):
 76 |   """rec, prec, ap = voc_eval(detpath,
 77 |                               annopath,
 78 |                               imagesetfile,
 79 |                               classname,
 80 |                               [ovthresh],
 81 |                               [use_07_metric])
 82 | 
 83 |   Top level function that does the PASCAL VOC evaluation.
 84 | 
 85 |   detpath: Path to detections
 86 |       detpath.format(classname) should produce the detection results file.
 87 |   annopath: Path to annotations
 88 |       annopath.format(imagename) should be the xml annotations file.
 89 |   imagesetfile: Text file containing the list of images, one image per line.
 90 |   classname: Category name (duh)
 91 |   cachedir: Directory for caching the annotations
 92 |   [ovthresh]: Overlap threshold (default = 0.5)
 93 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
 94 |       (default False)
 95 |   """
 96 |   # assumes detections are in detpath.format(classname)
 97 |   # assumes annotations are in annopath.format(imagename)
 98 |   # assumes imagesetfile is a text file with each line an image name
 99 |   # cachedir caches the annotations in a pickle file
100 | 
101 |   # first load gt
102 |   if not os.path.isdir(cachedir):
103 |     os.mkdir(cachedir)
104 |   cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
105 |   # read list of images
106 |   with open(imagesetfile, 'r') as f:
107 |     lines = f.readlines()
108 |   imagenames = [x.strip() for x in lines]
109 | 
110 |   if not os.path.isfile(cachefile):
111 |     # load annotations
112 |     recs = {}
113 |     for i, imagename in enumerate(imagenames):
114 |       recs[imagename] = parse_rec(annopath.format(imagename))
115 |       if i % 100 == 0:
116 |         print('Reading annotation for {:d}/{:d}'.format(
117 |           i + 1, len(imagenames)))
118 |     # save
119 |     print('Saving cached annotations to {:s}'.format(cachefile))
120 |     with open(cachefile, 'wb') as f:
121 |       pickle.dump(recs, f)
122 |   else:
123 |     # load
124 |     with open(cachefile, 'rb') as f:
125 |       try:
126 |         recs = pickle.load(f)
127 |       except:
128 |         recs = pickle.load(f, encoding='bytes')
129 | 
130 |   # extract gt objects for this class
131 |   class_recs = {}
132 |   npos = 0
133 |   for imagename in imagenames:
134 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
135 |     bbox = np.array([x['bbox'] for x in R])
136 |     difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
137 |     det = [False] * len(R)
138 |     npos = npos + sum(~difficult)
139 |     class_recs[imagename] = {'bbox': bbox,
140 |                              'difficult': difficult,
141 |                              'det': det}
142 | 
143 |   # read dets
144 |   detfile = detpath.format(classname)
145 |   with open(detfile, 'r') as f:
146 |     lines = f.readlines()
147 | 
148 |   splitlines = [x.strip().split(' ') for x in lines]
149 |   image_ids = [x[0] for x in splitlines]
150 |   confidence = np.array([float(x[1]) for x in splitlines])
151 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
152 | 
153 |   nd = len(image_ids)
154 |   tp = np.zeros(nd)
155 |   fp = np.zeros(nd)
156 | 
157 |   if BB.shape[0] > 0:
158 |     # sort by confidence
159 |     sorted_ind = np.argsort(-confidence)
160 |     sorted_scores = np.sort(-confidence)
161 |     BB = BB[sorted_ind, :]
162 |     image_ids = [image_ids[x] for x in sorted_ind]
163 | 
164 |     # go down dets and mark TPs and FPs
165 |     for d in range(nd):
166 |       R = class_recs[image_ids[d]]
167 |       bb = BB[d, :].astype(float)
168 |       ovmax = -np.inf
169 |       BBGT = R['bbox'].astype(float)
170 | 
171 |       if BBGT.size > 0:
172 |         # compute overlaps
173 |         # intersection
174 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
175 |         iymin = np.maximum(BBGT[:, 1], bb[1])
176 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
177 |         iymax = np.minimum(BBGT[:, 3], bb[3])
178 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
179 |         ih = np.maximum(iymax - iymin + 1., 0.)
180 |         inters = iw * ih
181 | 
182 |         # union
183 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
184 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
185 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
186 | 
187 |         overlaps = inters / uni
188 |         ovmax = np.max(overlaps)
189 |         jmax = np.argmax(overlaps)
190 | 
191 |       if ovmax > ovthresh:
192 |         if not R['difficult'][jmax]:
193 |           if not R['det'][jmax]:
194 |             tp[d] = 1.
195 |             R['det'][jmax] = 1
196 |           else:
197 |             fp[d] = 1.
198 |       else:
199 |         fp[d] = 1.
200 | 
201 |   # compute precision recall
202 |   fp = np.cumsum(fp)
203 |   tp = np.cumsum(tp)
204 |   rec = tp / float(npos)
205 |   # avoid divide by zero in case the first detection matches a difficult
206 |   # ground truth
207 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
208 |   ap = voc_ap(rec, prec, use_07_metric)
209 | 
210 |   return rec, prec, ap
211 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | An implementation of [DetNet: A Backbone network for Object Detection](https://arxiv.org/abs/1804.06215). Due to the short time, I only trained and tested on pascal voc dataset. It proved that the performance of detnet59 is indeed better than FPN101. 
  2 | 
  3 | ## Introduction
  4 | 
  5 | Firstly, I spent about one week training detnet59 on the ImageNet dataset .The classification  performance  of detnet59 is a little better than the original resnet50. Then i used the pretrained detnet59 to train and test on pascal voc.
  6 | 
  7 | Based on [**FPN_Pytorch**](https://github.com/guoruoqian/FPN_Pytorch/), i change FPN101 to detnet59.
  8 | 
  9 | **Update 2019/01/01**
 10 | 
 11 | **Fix bugs in demo.py. Now you can run demo.py**. **Note the default demo.py merely support pascal_voc categories.**  You need to change the ```pascal_classes``` in demo.py to adapt your own dataset. If you want to know more details, please see the **usage** part.
 12 | 
 13 | **Update 2018/8/21**
 14 | 
 15 | **train and test on COCO2017 !**
 16 | 
 17 | **Update**
 18 | 
 19 | **Adding soft_nms.**  **Without requiring any re-training of existing models.** You only need to use soft_nms during testing to bring performance improvements. 
 20 | 
 21 | ## Benchmarking
 22 | 
 23 | I benchmark this code thoroughly on pascal voc2007 and 07+12. Below are the results:
 24 | 
 25 | 0). ImageNet(test on validation dataset)
 26 | 
 27 | | backbone                       | Top1 error |
 28 | | ------------------------------ | ---------- |
 29 | | pytorch resnet50               | 23.9       |
 30 | | detnet59 in this code          | 23.8       |
 31 | | detnet59 in the original paper | 23.5       |
 32 | 
 33 | 1). PASCAL VOC 2007 (Train/Test: 07trainval/07test, scale=600, ROI Align)
 34 | 
 35 | | model（FPN）                                                 | GPUs            | Batch Size | lr   | lr_decay | max_epoch | Speed/epoch | Memory/GPU | mAP  |
 36 | | ------------------------------------------------------------ | --------------- | ---------- | ---- | -------- | --------- | ----------- | ---------- | ---- |
 37 | | ResNet-101                                                   | 1 GTX 1080 (Ti) | 2          | 1e-3 | 10       | 12        | 1.44hr      | 6137MB     | 75.7 |
 38 | | [DetNet59](https://www.dropbox.com/home/DetNet/PASCAL%20VOC%202007?preview=fpn_1_8_5010.pth) | 1 GTX 1080 (Ti) | 2          | 1e-3 | 10       | 12        | 1.07hr      | 5412MB     | 75.9 |
 39 | 
 40 | 2). PASCAL VOC 07+12 (Train/Test: 07+12trainval/07test, scale=600, ROI Align)
 41 | 
 42 | | model（FPN）                                                 | GPUs            | Batch Size | lr   | lr_decay | max_epoch | Speed/epoch | Memory/GPU | mAP  |
 43 | | ------------------------------------------------------------ | --------------- | ---------- | ---- | -------- | --------- | ----------- | ---------- | ---- |
 44 | | ResNet-101                                                   | 1 GTX 1080 (Ti) | 1          | 1e-3 | 10       | 12        | 3.96hr      | 9011MB     | 80.5 |
 45 | | [DetNet59](https://www.dropbox.com/home/DetNet/PASCAL%20VOC%2007%2B12?preview=fpn_1_7_33101.pth) | 1 GTX 1080 (Ti) | 1          | 1e-3 | 10       | 12        | 2.33hr      | 8015MB     | 80.7 |
 46 | | ResNet-101(**using soft_nms when testing**)                  | 1 GTX 1080 (Ti) | \          | \    | \        | \         | \           | \          | 81.2 |
 47 | | DetNet59(**using soft_nms when testing**)                    | 1 GTX 1080 (Ti) | \          | \    | \        | \         | \           | \          | 81.6 |
 48 | 
 49 | 3). COCO2017 (Train/Test:COCO2017train/COCO2017val, scale=800, max_size=1200，ROI Align)
 50 | 
 51 | | model                                                        | #GPUs | batch size | lr   | lr_decay | max_epoch | time/epoch | mem/GPU | mAP  |
 52 | | ------------------------------------------------------------ | ----- | ---------- | ---- | -------- | --------- | ---------- | ------- | ---- |
 53 | | [DetNet59](https://1drv.ms/u/s!AiHdFv3GrokYhU2WdxmwcK9RK_gW) | 2     | 4          | 4e-3 | 4        | 11        | \          | 9000    | 36.0 |
 54 | 
 55 | ## Preparation
 56 | 
 57 | First of all, clone the code
 58 | 
 59 | ```
 60 | git clone https://github.com/guoruoqian/DetNet_Pytorch.git
 61 | ```
 62 | 
 63 | Then, create a folder:
 64 | 
 65 | ```shell
 66 | cd DetNet_Pytorch && mkdir data
 67 | ```
 68 | 
 69 | ### prerequisites
 70 | 
 71 | - Python 2.7 or 3.6
 72 | - Pytorch 0.2.0 or higher（not support pytorch version >=0.4.0）
 73 | - CUDA 8.0 or higher
 74 | - tensorboardX
 75 | 
 76 | ### Data Preparation
 77 | 
 78 | - VOC2007: Please follow the instructions in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models) to prepare VOC datasets. Actually, you can refer to any others. After downloading the data, creat softlinks in the folder data/.
 79 | - VOC 07 + 12: Please follow the instructions in [YuwenXiong/py-R-FCN](https://github.com/YuwenXiong/py-R-FCN/blob/master/README.md#preparation-for-training--testing) . **I think this instruction is more helpful to prepare VOC datasets.**
 80 | 
 81 | ### Pretrained Model 
 82 | 
 83 |  You can download the detnet59 model which i trained on ImageNet from:
 84 | 
 85 | - detnet59: [dropbox](https://www.dropbox.com/home/DetNet?preview=detnet59.pth)，[baiduyun](https://pan.baidu.com/s/14_ztsAKcrZGb4nnm8aCMyQ)
 86 | 
 87 |  Download it and put it into the data/pretrained_model/. 
 88 | 
 89 | ### Compilation
 90 | 
 91 | As pointed out by [ruotianluo/pytorch-faster-rcnn](https://github.com/ruotianluo/pytorch-faster-rcnn), choose the right `-arch` in `make.sh` file, to compile the cuda code: 
 92 | 
 93 | | GPU model                  | Architecture |
 94 | | :------------------------- | :----------: |
 95 | | TitanX (Maxwell/Pascal)    |    sm_52     |
 96 | | GTX 960M                   |    sm_50     |
 97 | | GTX 1080 (Ti)              |    sm_61     |
 98 | | Grid K520 (AWS g2.2xlarge) |    sm_30     |
 99 | | Tesla K80 (AWS p2.xlarge)  |    sm_37     |
100 | 
101 | Install all the python dependencies using pip: 
102 | 
103 | ```shell
104 | pip install -r requirements.txt
105 | ```
106 | 
107 | Compile the cuda dependencies using following simple commands: 
108 | 
109 | ```shell
110 | cd lib
111 | sh make.sh
112 | ```
113 | 
114 | It will compile all the modules you need, including NMS, ROI_Pooing, ROI_Align and ROI_Crop. The default version is compiled with Python 2.7, please compile by yourself if you are using a different python version. 
115 | 
116 | ## Usage
117 | 
118 | train voc2007:
119 | 
120 | ```shell
121 | CUDA_VISIBLE_DEVICES=3 python3 trainval_net.py exp_name --dataset pascal_voc --net detnet59 --bs 2 --nw 4 --lr 1e-3 --epochs 12 --save_dir weights --cuda --use_tfboard True
122 | ```
123 | 
124 | test voc2007:
125 | 
126 | ```shell
127 | CUDA_VISIBLE_DEVICES=3 python3 test_net.py exp_name --dataset pascal_voc --net detnet59 --checksession 1 --checkepoch 7 --checkpoint 5010 --cuda --load_dir weights
128 | ```
129 | 
130 | run demo.py :
131 | 
132 | Before run demo, you must make dictionary 'demo_images' and put images (VOC images) in it. You can download the pretrained model  listed in above tables.  
133 | 
134 | ```shell
135 | CUDA_VISIBLE_DEVICES=0 python3 demo.py exp_name --dataset pascal_voc --net detnet59 --checksession 1 --checkepoch 7 --checkpoint 5010 --cuda --load_dir weights --image_dir demo_images --result_dir vis_results
136 | ```
137 | 
138 | **using soft_nms when testing**:
139 | 
140 | ```shell
141 | CUDA_VISIBLE_DEVICES=3 python3 test_net.py exp_name --dataset pascal_voc --net detnet59 --checksession 1 --checkepoch 7 --checkpoint 5010 --cuda --load_dir weights --soft_nms
142 | ```
143 | 
144 | Before training voc07+12, you can must set ASPECT_CROPPING in detnet59.yml False, or you will encounter some error during the training. 
145 | 
146 | train voc07+12:
147 | 
148 | ```shell
149 | CUDA_VISIBLE_DEVICES=3 python3 trainval_net.py exp_name2 --dataset pascal_voc_0712 --net detnet59 --bs 1 --nw 4 --lr 1e-3 --epochs 12 --save_dir weights --cuda --use_tfboard True
150 | ```
151 | train coco:
152 | 
153 | ```shell
154 | CUDA_VISIBLE_DEVICES=6,7 python3 trainval_net.py detnetv1.0 --dataset coco --net detnet59 --bs 4 --nw 4 --lr 4e-3 --epochs 12 --save_dir weights --cuda --lscale --mGPUs
155 | ```
156 | 
157 | test coco:
158 | 
159 | ```shell
160 | CUDA_VISIBLE_DEVICES=2 python3 test_net.py detnetv1.0 --dataset coco --net detnet59 --checksession 1 --checkepoch 7 --checkpoint 58632 --cuda --load_dir weights --ls
161 | ```
162 | 
163 | ### TODO
164 | 
165 | - Train and test on COCO(**Done**)


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
 16 |                                     const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
 17 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 18 |             // (n, c, ph, pw) is an element in the aligned output
 19 |             // int n = index;
 20 |             // int pw = n % aligned_width;
 21 |             // n /= aligned_width;
 22 |             // int ph = n % aligned_height;
 23 |             // n /= aligned_height;
 24 |             // int c = n % channels;
 25 |             // n /= channels;
 26 | 
 27 |             int pw = index % aligned_width;
 28 |             int ph = (index / aligned_width) % aligned_height;
 29 |             int c  = (index / aligned_width / aligned_height) % channels;
 30 |             int n  = index / aligned_width / aligned_height / channels;
 31 | 
 32 |             // bottom_rois += n * 5;
 33 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
 34 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
 35 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
 36 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
 37 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
 38 | 
 39 |             // Force malformed ROIs to be 1x1
 40 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
 41 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
 42 |             float bin_size_h = roi_height / (aligned_height - 1.);
 43 |             float bin_size_w = roi_width / (aligned_width - 1.);
 44 | 
 45 |             float h = (float)(ph) * bin_size_h + roi_start_h;
 46 |             float w = (float)(pw) * bin_size_w + roi_start_w;
 47 | 
 48 |             int hstart = fminf(floor(h), height - 2);
 49 |             int wstart = fminf(floor(w), width - 2);
 50 | 
 51 |             int img_start = roi_batch_ind * channels * height * width;
 52 | 
 53 |             // bilinear interpolation
 54 |             if (h < 0 || h >= height || w < 0 || w >= width) {
 55 |                 top_data[index] = 0.;
 56 |             } else {
 57 |                 float h_ratio = h - (float)(hstart);
 58 |                 float w_ratio = w - (float)(wstart);
 59 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
 60 |                 int upright = upleft + 1;
 61 |                 int downleft = upleft + width;
 62 |                 int downright = downleft + 1;
 63 | 
 64 |                 top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
 65 |                     + bottom_data[upright] * (1. - h_ratio) * w_ratio
 66 |                     + bottom_data[downleft] * h_ratio * (1. - w_ratio)
 67 |                     + bottom_data[downright] * h_ratio * w_ratio;
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 | 
 73 |     int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
 74 |                                const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
 75 |         const int kThreadsPerBlock = 1024;
 76 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
 77 |         cudaError_t err;
 78 | 
 79 | 
 80 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 81 |           output_size, bottom_data, spatial_scale, height, width, channels,
 82 |           aligned_height, aligned_width, bottom_rois, top_data);
 83 | 
 84 |         err = cudaGetLastError();
 85 |         if(cudaSuccess != err) {
 86 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 87 |             exit( -1 );
 88 |         }
 89 | 
 90 |         return 1;
 91 |     }
 92 | 
 93 | 
 94 |     __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
 95 |                                      const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
 96 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 97 | 
 98 |             // (n, c, ph, pw) is an element in the aligned output
 99 |             int pw = index % aligned_width;
100 |             int ph = (index / aligned_width) % aligned_height;
101 |             int c  = (index / aligned_width / aligned_height) % channels;
102 |             int n  = index / aligned_width / aligned_height / channels;
103 | 
104 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
105 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 |             /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 |             /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 |             /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 |             /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 | 
114 |             // Force malformed ROIs to be 1x1
115 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 |             float bin_size_h = roi_height / (aligned_height - 1.);
118 |             float bin_size_w = roi_width / (aligned_width - 1.);
119 | 
120 |             float h = (float)(ph) * bin_size_h + roi_start_h;
121 |             float w = (float)(pw) * bin_size_w + roi_start_w;
122 | 
123 |             int hstart = fminf(floor(h), height - 2);
124 |             int wstart = fminf(floor(w), width - 2);
125 | 
126 |             int img_start = roi_batch_ind * channels * height * width;
127 | 
128 |             // bilinear interpolation
129 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 |                 float h_ratio = h - (float)(hstart);
131 |                 float w_ratio = w - (float)(wstart);
132 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
133 |                 int upright = upleft + 1;
134 |                 int downleft = upleft + width;
135 |                 int downright = downleft + 1;
136 | 
137 |                 atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 |                 atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 |                 atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 |                 atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 |             }
142 |         }
143 |     }
144 | 
145 |     int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 |                                 const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 |         const int kThreadsPerBlock = 1024;
148 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
149 |         cudaError_t err;
150 | 
151 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 |           output_size, top_diff, spatial_scale, height, width, channels,
153 |           aligned_height, aligned_width, bottom_diff, bottom_rois);
154 | 
155 |         err = cudaGetLastError();
156 |         if(cudaSuccess != err) {
157 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 |             exit( -1 );
159 |         }
160 | 
161 |         return 1;
162 |     }
163 | 
164 | 
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 | 


--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | import pdb
 10 | 
 11 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 12 | #
 13 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 14 | #    >> anchors
 15 | #
 16 | #    anchors =
 17 | #
 18 | #       -83   -39   100    56
 19 | #      -175   -87   192   104
 20 | #      -359  -183   376   200
 21 | #       -55   -55    72    72
 22 | #      -119  -119   136   136
 23 | #      -247  -247   264   264
 24 | #       -35   -79    52    96
 25 | #       -79  -167    96   184
 26 | #      -167  -343   184   360
 27 | 
 28 | #array([[ -83.,  -39.,  100.,   56.],
 29 | #       [-175.,  -87.,  192.,  104.],
 30 | #       [-359., -183.,  376.,  200.],
 31 | #       [ -55.,  -55.,   72.,   72.],
 32 | #       [-119., -119.,  136.,  136.],
 33 | #       [-247., -247.,  264.,  264.],
 34 | #       [ -35.,  -79.,   52.,   96.],
 35 | #       [ -79., -167.,   96.,  184.],
 36 | #       [-167., -343.,  184.,  360.]])
 37 | 
 38 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 39 |                      scales=2**np.arange(3, 6)):
 40 |     """
 41 |     Generate anchor (reference) windows by enumerating aspect ratios X
 42 |     scales wrt a reference (0, 0, 15, 15) window.
 43 |     """
 44 | 
 45 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 46 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 47 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 48 |                          for i in xrange(ratio_anchors.shape[0])])
 49 |     return anchors
 50 |     
 51 | def _whctrs(anchor):
 52 |     """
 53 |     Return width, height, x center, and y center for an anchor (window).
 54 |     """
 55 | 
 56 |     w = anchor[2] - anchor[0] + 1
 57 |     h = anchor[3] - anchor[1] + 1
 58 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 59 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 60 |     return w, h, x_ctr, y_ctr
 61 | 
 62 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 63 |     """
 64 |     Given a vector of widths (ws) and heights (hs) around a center
 65 |     (x_ctr, y_ctr), output a set of anchors (windows).
 66 |     """
 67 | 
 68 |     ws = ws[:, np.newaxis]
 69 |     hs = hs[:, np.newaxis]
 70 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 71 |                          y_ctr - 0.5 * (hs - 1),
 72 |                          x_ctr + 0.5 * (ws - 1),
 73 |                          y_ctr + 0.5 * (hs - 1)))
 74 |     return anchors
 75 | 
 76 | def _ratio_enum(anchor, ratios):
 77 |     """
 78 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 79 |     """
 80 | 
 81 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 82 |     size = w * h
 83 |     size_ratios = size / ratios
 84 |     ws = np.round(np.sqrt(size_ratios))
 85 |     hs = np.round(ws * ratios)
 86 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 87 |     return anchors
 88 | 
 89 | def _scale_enum(anchor, scales):
 90 |     """
 91 |     Enumerate a set of anchors for each scale wrt an anchor.
 92 |     """
 93 | 
 94 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 95 |     ws = w * scales
 96 |     hs = h * scales
 97 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 98 |     return anchors
 99 | 
100 | if __name__ == '__main__':
101 |     import time
102 |     t = time.time()
103 |     a = generate_anchors()
104 |     print(time.time() - t)
105 |     print(a)
106 |     from IPython import embed; embed()
107 | 
108 | ############################################################
109 | #  Anchors
110 | ############################################################
111 | 
112 | def generate_anchors_single_pyramid(scales, ratios, shape, feature_stride, anchor_stride):
113 |     """
114 |     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
115 |     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
116 |     shape: [height, width] spatial shape of the feature map over which
117 |             to generate anchors.
118 |     feature_stride: Stride of the feature map relative to the image in pixels.
119 |     anchor_stride: Stride of anchors on the feature map. For example, if the
120 |         value is 2 then generate anchors for every other feature map pixel.
121 |     """
122 |     # Get all combinations of scales and ratios
123 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
124 |     scales = scales.flatten()
125 |     ratios = ratios.flatten()
126 | 
127 |     # Enumerate heights and widths from scales and ratios
128 |     heights = scales / np.sqrt(ratios)
129 |     widths = scales * np.sqrt(ratios)
130 | 
131 |     # Enumerate shifts in feature space
132 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
133 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
134 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
135 | 
136 |     # Enumerate combinations of shifts, widths, and heights
137 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
138 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
139 | 
140 |     # # Reshape to get a list of (y, x) and a list of (h, w)
141 |     # box_centers = np.stack(
142 |     #     [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
143 |     # box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
144 | 
145 |     # NOTE: the original order is  (y, x), we changed it to (x, y) for our code
146 |     # Reshape to get a list of (x, y) and a list of (w, h)
147 |     box_centers = np.stack(
148 |         [box_centers_x, box_centers_y], axis=2).reshape([-1, 2])
149 |     box_sizes = np.stack([box_widths, box_heights], axis=2).reshape([-1, 2])
150 | 
151 |     # Convert to corner coordinates (x1, y1, x2, y2)
152 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
153 |                             box_centers + 0.5 * box_sizes], axis=1)
154 |     return boxes
155 | 
156 | 
157 | def generate_anchors_all_pyramids(scales, ratios, feature_shapes, feature_strides,
158 |                              anchor_stride):
159 |     """Generate anchors at different levels of a feature pyramid. Each scale
160 |     is associated with a level of the pyramid, but each ratio is used in
161 |     all levels of the pyramid.
162 |     Returns:
163 |     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
164 |         with the same order of the given scales. So, anchors of scale[0] come
165 |         first, then anchors of scale[1], and so on.
166 |     """
167 |     # Anchors
168 |     # [anchor_count, (y1, x1, y2, x2)]
169 |     anchors = []
170 |     for i in range(len(scales)):
171 |         anchors.append(generate_anchors_single_pyramid(scales[i], ratios, feature_shapes[i],
172 |                                         feature_strides[i], anchor_stride))
173 |     return np.concatenate(anchors, axis=0)
174 | 
175 | 
176 | # def generate_anchors_single_pyramid(scales, ratios):
177 | #     """
178 | #     scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
179 | #     ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
180 | #     """
181 | 
182 | #     # Get all combinations of scales and ratios
183 | #     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
184 | #     scales = scales.flatten()
185 | #     ratios = ratios.flatten()
186 | 
187 | #     # Enumerate heights and widths from scales and ratios
188 | #     heights = scales / np.sqrt(ratios)
189 | #     widths = scales * np.sqrt(ratios)
190 | 
191 | #     # Reshape to get a list of (y, x) and a list of (h, w)
192 | #     box_sizes = np.stack([heights, widths], axis=1).reshape([-1, 2])
193 | 
194 | #     return box_sizes
195 | 
196 | 
197 | # def generate_anchors_all_pyramids(scales, ratios):
198 | #     """Generate anchors at different levels of a feature pyramid. Each scale
199 | #     is associated with a level of the pyramid, but each ratio is used in
200 | #     all levels of the pyramid.
201 | #     Returns:
202 | #     anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
203 | #         with the same order of the given scales. So, anchors of scale[0] come
204 | #         first, then anchors of scale[1], and so on.
205 | #     """
206 | #     # Anchors
207 | #     # [anchor_count, (y1, x1, y2, x2)]
208 | #     anchors = []
209 | #     for i in range(len(scales)):
210 | #         anchors.append(generate_anchors_single_pyramid(scales[i], ratios))
211 | #     return np.concatenate(anchors, axis=0)    


--------------------------------------------------------------------------------
/lib/datasets/imagenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import datasets
 10 | import datasets.imagenet
 11 | import os, sys
 12 | from datasets.imdb import imdb
 13 | import xml.dom.minidom as minidom
 14 | import numpy as np
 15 | import scipy.sparse
 16 | import scipy.io as sio
 17 | import pickle
 18 | import subprocess
 19 | import pdb
 20 | 
 21 | class imagenet(imdb):
 22 |     def __init__(self, image_set, devkit_path, data_path):
 23 |         imdb.__init__(self, image_set)
 24 |         self._image_set = image_set
 25 |         self._devkit_path = devkit_path
 26 |         self._data_path = data_path
 27 |         synsets_image = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_det.mat'))
 28 |         synsets_video = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_vid.mat'))
 29 |         self._classes_image = ('__background__',)
 30 |         self._wnid_image = (0,)
 31 | 
 32 |         self._classes = ('__background__',)
 33 |         self._wnid = (0,)
 34 | 
 35 |         for i in xrange(200):
 36 |             self._classes_image = self._classes_image + (synsets_image['synsets'][0][i][2][0],)
 37 |             self._wnid_image = self._wnid_image + (synsets_image['synsets'][0][i][1][0],)
 38 | 
 39 |         for i in xrange(30):
 40 |             self._classes = self._classes + (synsets_video['synsets'][0][i][2][0],)
 41 |             self._wnid = self._wnid + (synsets_video['synsets'][0][i][1][0],)
 42 | 
 43 |         self._wnid_to_ind_image = dict(zip(self._wnid_image, xrange(201)))
 44 |         self._class_to_ind_image = dict(zip(self._classes_image, xrange(201)))
 45 | 
 46 |         self._wnid_to_ind = dict(zip(self._wnid, xrange(31)))
 47 |         self._class_to_ind = dict(zip(self._classes, xrange(31)))
 48 | 
 49 |         #check for valid intersection between video and image classes
 50 |         self._valid_image_flag = [0]*201
 51 | 
 52 |         for i in range(1,201):
 53 |             if self._wnid_image[i] in self._wnid_to_ind:
 54 |                 self._valid_image_flag[i] = 1
 55 | 
 56 |         self._image_ext = ['.JPEG']
 57 | 
 58 |         self._image_index = self._load_image_set_index()
 59 |         # Default to roidb handler
 60 |         self._roidb_handler = self.gt_roidb
 61 | 
 62 |         # Specific config options
 63 |         self.config = {'cleanup'  : True,
 64 |                        'use_salt' : True,
 65 |                        'top_k'    : 2000}
 66 | 
 67 |         assert os.path.exists(self._devkit_path), 'Devkit path does not exist: {}'.format(self._devkit_path)
 68 |         assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)
 69 | 
 70 |     def image_path_at(self, i):
 71 |         """
 72 |         Return the absolute path to image i in the image sequence.
 73 |         """
 74 |         return self.image_path_from_index(self._image_index[i])
 75 | 
 76 |     def image_path_from_index(self, index):
 77 |         """
 78 |         Construct an image path from the image's "index" identifier.
 79 |         """
 80 |         image_path = os.path.join(self._data_path, 'Data', self._image_set, index + self._image_ext[0])
 81 |         assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
 82 |         return image_path
 83 | 
 84 |     def _load_image_set_index(self):
 85 |         """
 86 |         Load the indexes listed in this dataset's image set file.
 87 |         """
 88 |         # Example path to image set file:
 89 |         # self._data_path + /ImageSets/val.txt
 90 | 
 91 |         if self._image_set == 'train':
 92 |             image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt')
 93 |             image_index = []
 94 |             if os.path.exists(image_set_file):
 95 |                 f = open(image_set_file, 'r')
 96 |                 data = f.read().split()
 97 |                 for lines in data:
 98 |                     if lines != '':
 99 |                         image_index.append(lines)
100 |                 f.close()
101 |                 return image_index
102 | 
103 |             for i in range(1,200):
104 |                 print(i)
105 |                 image_set_file = os.path.join(self._data_path, 'ImageSets', 'DET', 'train_' + str(i) + '.txt')
106 |                 with open(image_set_file) as f:
107 |                     tmp_index = [x.strip() for x in f.readlines()]
108 |                     vtmp_index = []
109 |                     for line in tmp_index:
110 |                         line = line.split(' ')
111 |                         image_list = os.popen('ls ' + self._data_path + '/Data/DET/train/' + line[0] + '/*.JPEG').read().split()
112 |                         tmp_list = []
113 |                         for imgs in image_list:
114 |                             tmp_list.append(imgs[:-5])
115 |                         vtmp_index = vtmp_index + tmp_list
116 | 
117 |                 num_lines = len(vtmp_index)
118 |                 ids = np.random.permutation(num_lines)
119 |                 count = 0
120 |                 while count < 2000:
121 |                     image_index.append(vtmp_index[ids[count % num_lines]])
122 |                     count = count + 1
123 | 
124 |             for i in range(1,201):
125 |                 if self._valid_image_flag[i] == 1:
126 |                     image_set_file = os.path.join(self._data_path, 'ImageSets', 'train_pos_' + str(i) + '.txt')
127 |                     with open(image_set_file) as f:
128 |                         tmp_index = [x.strip() for x in f.readlines()]
129 |                     num_lines = len(tmp_index)
130 |                     ids = np.random.permutation(num_lines)
131 |                     count = 0
132 |                     while count < 2000:
133 |                         image_index.append(tmp_index[ids[count % num_lines]])
134 |                         count = count + 1
135 |             image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt')
136 |             f = open(image_set_file, 'w')
137 |             for lines in image_index:
138 |                 f.write(lines + '\n')
139 |             f.close()
140 |         else:
141 |             image_set_file = os.path.join(self._data_path, 'ImageSets', 'val.txt')
142 |             with open(image_set_file) as f:
143 |                 image_index = [x.strip() for x in f.readlines()]
144 |         return image_index
145 | 
146 |     def gt_roidb(self):
147 |         """
148 |         Return the database of ground-truth regions of interest.
149 |         This function loads/saves from/to a cache file to speed up future calls.
150 |         """
151 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
152 |         if os.path.exists(cache_file):
153 |             with open(cache_file, 'rb') as fid:
154 |                 roidb = pickle.load(fid)
155 |             print('{} gt roidb loaded from {}'.format(self.name, cache_file))
156 |             return roidb
157 | 
158 |         gt_roidb = [self._load_imagenet_annotation(index)
159 |                     for index in self.image_index]
160 |         with open(cache_file, 'wb') as fid:
161 |             pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
162 |         print('wrote gt roidb to {}'.format(cache_file))
163 | 
164 |         return gt_roidb
165 | 
166 | 
167 |     def _load_imagenet_annotation(self, index):
168 |         """
169 |         Load image and bounding boxes info from txt files of imagenet.
170 |         """
171 |         filename = os.path.join(self._data_path, 'Annotations', self._image_set, index + '.xml')
172 | 
173 |         # print 'Loading: {}'.format(filename)
174 |         def get_data_from_tag(node, tag):
175 |             return node.getElementsByTagName(tag)[0].childNodes[0].data
176 | 
177 |         with open(filename) as f:
178 |             data = minidom.parseString(f.read())
179 | 
180 |         objs = data.getElementsByTagName('object')
181 |         num_objs = len(objs)
182 | 
183 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
184 |         gt_classes = np.zeros((num_objs), dtype=np.int32)
185 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
186 | 
187 |         # Load object bounding boxes into a data frame.
188 |         for ix, obj in enumerate(objs):
189 |             x1 = float(get_data_from_tag(obj, 'xmin'))
190 |             y1 = float(get_data_from_tag(obj, 'ymin'))
191 |             x2 = float(get_data_from_tag(obj, 'xmax'))
192 |             y2 = float(get_data_from_tag(obj, 'ymax'))
193 |             cls = self._wnid_to_ind[
194 |                     str(get_data_from_tag(obj, "name")).lower().strip()]
195 |             boxes[ix, :] = [x1, y1, x2, y2]
196 |             gt_classes[ix] = cls
197 |             overlaps[ix, cls] = 1.0
198 | 
199 |         overlaps = scipy.sparse.csr_matrix(overlaps)
200 | 
201 |         return {'boxes' : boxes,
202 |                 'gt_classes': gt_classes,
203 |                 'gt_overlaps' : overlaps,
204 |                 'flipped' : False}
205 | 
206 | if __name__ == '__main__':
207 |     d = datasets.imagenet('val', '')
208 |     res = d.roidb
209 |     from IPython import embed; embed()
210 | 


--------------------------------------------------------------------------------
/lib/model/rpn/anchor_target_layer_fpn.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | # --------------------------------------------------------
  8 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
  9 | # --------------------------------------------------------
 10 | 
 11 | import torch
 12 | import torch.nn as nn
 13 | import numpy as np
 14 | import numpy.random as npr
 15 | 
 16 | from model.utils.config import cfg
 17 | from .generate_anchors import generate_anchors, generate_anchors_all_pyramids
 18 | from .bbox_transform import clip_boxes, bbox_overlaps_batch, bbox_transform_batch
 19 | try:
 20 |     long        # Python 2
 21 | except NameError:
 22 |     long = int  # Python 3
 23 | 
 24 | import pdb
 25 | 
 26 | DEBUG = False
 27 | 
 28 | class _AnchorTargetLayer_FPN(nn.Module):
 29 |     """
 30 |         Assign anchors to ground-truth targets. Produces anchor classification
 31 |         labels and bounding-box regression targets.
 32 |     """
 33 |     def __init__(self, feat_stride, scales, ratios):
 34 |         super(_AnchorTargetLayer_FPN, self).__init__()
 35 |         self._anchor_ratios = ratios
 36 |         self._feat_stride = feat_stride
 37 |         self._fpn_scales = np.array(cfg.FPN_ANCHOR_SCALES)
 38 |         self._fpn_feature_strides = np.array(cfg.FPN_FEAT_STRIDES)
 39 |         self._fpn_anchor_stride = cfg.FPN_ANCHOR_STRIDE
 40 | 
 41 |         # allow boxes to sit over the edge by a small amount
 42 |         self._allowed_border = 0  # default is 0
 43 | 
 44 |     def forward(self, input):
 45 |         # Algorithm:
 46 |         #
 47 |         # for each (H, W) location i
 48 |         #   generate 9 anchor boxes centered on cell i
 49 |         #   apply predicted bbox deltas at cell i to each of the 9 anchors
 50 |         # filter out-of-image anchors
 51 | 
 52 |         scores = input[0]
 53 |         gt_boxes = input[1]
 54 |         im_info = input[2]
 55 |         num_boxes = input[3]
 56 |         feat_shapes = input[4]
 57 | 
 58 |         # NOTE: need to change
 59 |         # height, width = scores.size(2), scores.size(3)
 60 |         height, width = 0, 0
 61 | 
 62 |         batch_size = gt_boxes.size(0)
 63 | 
 64 |         anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, 
 65 |                 feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores)    
 66 |         total_anchors = anchors.size(0)
 67 |         
 68 |         keep = ((anchors[:, 0] >= -self._allowed_border) &
 69 |                 (anchors[:, 1] >= -self._allowed_border) &
 70 |                 (anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) &
 71 |                 (anchors[:, 3] < long(im_info[0][0]) + self._allowed_border))
 72 | 
 73 |         inds_inside = torch.nonzero(keep).view(-1)
 74 | 
 75 |         # keep only inside anchors
 76 |         anchors = anchors[inds_inside, :]
 77 | 
 78 |         # label: 1 is positive, 0 is negative, -1 is dont care
 79 |         labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1)
 80 |         bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()
 81 |         bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()
 82 | 
 83 |         overlaps = bbox_overlaps_batch(anchors, gt_boxes)
 84 | 
 85 |         max_overlaps, argmax_overlaps = torch.max(overlaps, 2)
 86 |         gt_max_overlaps, _ = torch.max(overlaps, 1)
 87 | 
 88 |         if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
 89 |             labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
 90 | 
 91 |         gt_max_overlaps[gt_max_overlaps==0] = 1e-5
 92 |         keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2)
 93 | 
 94 |         if torch.sum(keep) > 0:
 95 |             labels[keep>0] = 1
 96 | 
 97 |         # fg label: above threshold IOU
 98 |         labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
 99 | 
100 |         if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
101 |             labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
102 | 
103 |         num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
104 | 
105 |         sum_fg = torch.sum((labels == 1).int(), 1)
106 |         sum_bg = torch.sum((labels == 0).int(), 1)
107 | 
108 |         for i in range(batch_size):
109 |             # subsample positive labels if we have too many
110 |             if sum_fg[i] > num_fg:
111 |                 fg_inds = torch.nonzero(labels[i] == 1).view(-1)
112 |                 # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. 
113 |                 # See https://github.com/pytorch/pytorch/issues/1868 for more details.
114 |                 # use numpy instead.                
115 |                 #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
116 |                 rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long()
117 |                 disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]]
118 |                 labels[i][disable_inds] = -1
119 | 
120 |             num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i]
121 | 
122 |             # subsample negative labels if we have too many
123 |             if sum_bg[i] > num_bg:
124 |                 bg_inds = torch.nonzero(labels[i] == 0).view(-1)
125 |                 #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()
126 | 
127 |                 rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long()
128 |                 disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]]
129 |                 labels[i][disable_inds] = -1
130 | 
131 |         offset = torch.arange(0, batch_size)*gt_boxes.size(1)
132 | 
133 |         argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps)
134 |         bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5))
135 | 
136 |         # use a single value instead of 4 values for easy index.
137 |         bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0]
138 | 
139 |         if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
140 |             num_examples = torch.sum(labels[i] >= 0)
141 |             positive_weights = 1.0 / num_examples
142 |             negative_weights = 1.0 / num_examples
143 |         else:
144 |             assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
145 |                     (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
146 | 
147 |         bbox_outside_weights[labels == 1] = positive_weights
148 |         bbox_outside_weights[labels == 0] = negative_weights
149 | 
150 |         labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1)
151 |         bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0)
152 |         bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0)
153 |         bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0)
154 | 
155 |         outputs = []
156 | 
157 |         # labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous()
158 |         # labels = labels.view(batch_size, 1, A * height, width)
159 |         outputs.append(labels)
160 |         # bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous()
161 |         outputs.append(bbox_targets)
162 | 
163 |         # anchors_count = bbox_inside_weights.size(1)
164 |         # bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
165 |         # bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\
166 |                             # .permute(0,3,1,2).contiguous()
167 | 
168 |         outputs.append(bbox_inside_weights)
169 | 
170 |         # bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
171 |         # bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\
172 |                             # .permute(0,3,1,2).contiguous()
173 |         outputs.append(bbox_outside_weights)
174 | 
175 |         return outputs
176 | 
177 |     def backward(self, top, propagate_down, bottom):
178 |         """This layer does not propagate gradients."""
179 |         pass
180 | 
181 |     def reshape(self, bottom, top):
182 |         """Reshaping happens during the call to forward."""
183 |         pass
184 | 
185 | def _unmap(data, count, inds, batch_size, fill=0):
186 |     """ Unmap a subset of item (data) back to the original set of items (of
187 |     size count) """
188 | 
189 |     if data.dim() == 2:
190 |         ret = torch.Tensor(batch_size, count).fill_(fill).type_as(data)
191 |         ret[:, inds] = data
192 |     else:
193 |         ret = torch.Tensor(batch_size, count, data.size(2)).fill_(fill).type_as(data)
194 |         ret[:, inds,:] = data
195 |     return ret
196 | 
197 | 
198 | def _compute_targets_batch(ex_rois, gt_rois):
199 |     """Compute bounding-box regression targets for an image."""
200 | 
201 |     return bbox_transform_batch(ex_rois, gt_rois[:, :, :4])
202 | 


--------------------------------------------------------------------------------
/lib/pycocotools/maskApi.c:
--------------------------------------------------------------------------------
  1 | /**************************************************************************
  2 | * Microsoft COCO Toolbox.      version 2.0
  3 | * Data, paper, and tutorials available at:  http://mscoco.org/
  4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  5 | * Licensed under the Simplified BSD License [see coco/license.txt]
  6 | **************************************************************************/
  7 | #include "maskApi.h"
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | 
 11 | uint umin( uint a, uint b ) { return (a<b) ? a : b; }
 12 | uint umax( uint a, uint b ) { return (a>b) ? a : b; }
 13 | 
 14 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
 15 |   R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
 16 |   siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
 17 | }
 18 | 
 19 | void rleFree( RLE *R ) {
 20 |   free(R->cnts); R->cnts=0;
 21 | }
 22 | 
 23 | void rlesInit( RLE **R, siz n ) {
 24 |   siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
 25 |   for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
 26 | }
 27 | 
 28 | void rlesFree( RLE **R, siz n ) {
 29 |   siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
 30 | }
 31 | 
 32 | void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
 33 |   siz i, j, k, a=w*h; uint c, *cnts; byte p;
 34 |   cnts = malloc(sizeof(uint)*(a+1));
 35 |   for(i=0; i<n; i++) {
 36 |     const byte *T=M+a*i; k=0; p=0; c=0;
 37 |     for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
 38 |     cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
 39 |   }
 40 |   free(cnts);
 41 | }
 42 | 
 43 | void rleDecode( const RLE *R, byte *M, siz n ) {
 44 |   siz i, j, k; for( i=0; i<n; i++ ) {
 45 |     byte v=0; for( j=0; j<R[i].m; j++ ) {
 46 |       for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
 47 | }
 48 | 
 49 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
 50 |   uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
 51 |   siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
 52 |   if(n==0) { rleInit(M,0,0,0,0); return; }
 53 |   if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
 54 |   cnts = malloc(sizeof(uint)*(h*w+1));
 55 |   for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
 56 |   for( i=1; i<n; i++ ) {
 57 |     B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
 58 |     rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
 59 |     v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
 60 |     while( ct>0 ) {
 61 |       c=umin(ca,cb); cc+=c; ct=0;
 62 |       ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
 63 |       cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
 64 |       vp=v; if(intersect) v=va&&vb; else v=va||vb;
 65 |       if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
 66 |     }
 67 |     rleFree(&A);
 68 |   }
 69 |   rleInit(M,h,w,m,cnts); free(cnts);
 70 | }
 71 | 
 72 | void rleArea( const RLE *R, siz n, uint *a ) {
 73 |   siz i, j; for( i=0; i<n; i++ ) {
 74 |     a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
 75 | }
 76 | 
 77 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
 78 |   siz g, d; BB db, gb; int crowd;
 79 |   db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
 80 |   gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
 81 |   bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
 82 |   for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
 83 |     crowd=iscrowd!=NULL && iscrowd[g];
 84 |     if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
 85 |     siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
 86 |     ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
 87 |     cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
 88 |     while( ct>0 ) {
 89 |       c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
 90 |       ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
 91 |       cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
 92 |     }
 93 |     if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
 94 |     o[g*m+d] = (double)i/(double)u;
 95 |   }
 96 | }
 97 | 
 98 | void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
 99 |   siz i, j; double u;
100 |   for( i=0; i<n; i++ ) keep[i]=1;
101 |   for( i=0; i<n; i++ ) if(keep[i]) {
102 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
103 |       rleIou(dt+i,dt+j,1,1,0,&u);
104 |       if(u>thr) keep[j]=0;
105 |     }
106 |   }
107 | }
108 | 
109 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
110 |   double h, w, i, u, ga, da; siz g, d; int crowd;
111 |   for( g=0; g<n; g++ ) {
112 |     BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
113 |     for( d=0; d<m; d++ ) {
114 |       BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
115 |       w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
116 |       h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
117 |       i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
118 |     }
119 |   }
120 | }
121 | 
122 | void bbNms( BB dt, siz n, uint *keep, double thr ) {
123 |   siz i, j; double u;
124 |   for( i=0; i<n; i++ ) keep[i]=1;
125 |   for( i=0; i<n; i++ ) if(keep[i]) {
126 |     for( j=i+1; j<n; j++ ) if(keep[j]) {
127 |       bbIou(dt+i*4,dt+j*4,1,1,0,&u);
128 |       if(u>thr) keep[j]=0;
129 |     }
130 |   }
131 | }
132 | 
133 | void rleToBbox( const RLE *R, BB bb, siz n ) {
134 |   siz i; for( i=0; i<n; i++ ) {
135 |     uint h, w, x, y, xs, ys, xe, ye, cc, t; siz j, m;
136 |     h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
137 |     m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
138 |     if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
139 |     for( j=0; j<m; j++ ) {
140 |       cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
141 |       xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
142 |     }
143 |     bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
144 |     bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
145 |   }
146 | }
147 | 
148 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
149 |   siz i; for( i=0; i<n; i++ ) {
150 |     double xs=bb[4*i+0], xe=xs+bb[4*i+2];
151 |     double ys=bb[4*i+1], ye=ys+bb[4*i+3];
152 |     double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
153 |     rleFrPoly( R+i, xy, 4, h, w );
154 |   }
155 | }
156 | 
157 | int uintCompare(const void *a, const void *b) {
158 |   uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
159 | }
160 | 
161 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
162 |   /* upsample and get discrete points densely along entire boundary */
163 |   siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
164 |   x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
165 |   for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
166 |   for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
167 |   for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
168 |   u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
169 |   for( j=0; j<k; j++ ) {
170 |     int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
171 |     int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
172 |     flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
173 |     if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
174 |     s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
175 |     if(dx>=dy) for( d=0; d<=dx; d++ ) {
176 |       t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
177 |     } else for( d=0; d<=dy; d++ ) {
178 |       t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
179 |     }
180 |   }
181 |   /* get points along y-boundary and downsample */
182 |   free(x); free(y); k=m; m=0; double xd, yd;
183 |   x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
184 |   for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
185 |     xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
186 |     if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
187 |     yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
188 |     if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
189 |     x[m]=(int) xd; y[m]=(int) yd; m++;
190 |   }
191 |   /* compute rle encoding given y-boundary points */
192 |   k=m; a=malloc(sizeof(uint)*(k+1));
193 |   for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
194 |   a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
195 |   qsort(a,k,sizeof(uint),uintCompare); uint p=0;
196 |   for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
197 |   b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
198 |   while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
199 |     j++; if(j<k) b[m-1]+=a[j++]; }
200 |   rleInit(R,h,w,m,b); free(a); free(b);
201 | }
202 | 
203 | char* rleToString( const RLE *R ) {
204 |   /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
205 |   siz i, m=R->m, p=0; long x; int more;
206 |   char *s=malloc(sizeof(char)*m*6);
207 |   for( i=0; i<m; i++ ) {
208 |     x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
209 |     while( more ) {
210 |       char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
211 |       if(more) c |= 0x20; c+=48; s[p++]=c;
212 |     }
213 |   }
214 |   s[p]=0; return s;
215 | }
216 | 
217 | void rleFrString( RLE *R, char *s, siz h, siz w ) {
218 |   siz m=0, p=0, k; long x; int more; uint *cnts;
219 |   while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
220 |   while( s[p] ) {
221 |     x=0; k=0; more=1;
222 |     while( more ) {
223 |       char c=s[p]-48; x |= (c & 0x1f) << 5*k;
224 |       more = c & 0x20; p++; k++;
225 |       if(!more && (c & 0x10)) x |= -1 << 5*k;
226 |     }
227 |     if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
228 |   }
229 |   rleInit(R,h,w,m,cnts); free(cnts);
230 | }
231 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roibatchLoader.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """The data layer used during training to train a Fast R-CNN network.
  3 | """
  4 | 
  5 | from __future__ import absolute_import
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import torch.utils.data as data
 10 | from PIL import Image
 11 | import torch
 12 | 
 13 | from model.utils.config import cfg
 14 | from roi_data_layer.minibatch import get_minibatch, get_minibatch
 15 | from model.rpn.bbox_transform import bbox_transform_inv, clip_boxes
 16 | 
 17 | import numpy as np
 18 | import random
 19 | import time
 20 | import pdb
 21 | 
 22 | class roibatchLoader(data.Dataset):
 23 |   def __init__(self, roidb, ratio_list, ratio_index, batch_size, num_classes, training=True, normalize=None):
 24 |     self._roidb = roidb
 25 |     self._num_classes = num_classes
 26 |     # we make the height of image consistent to trim_height, trim_width
 27 |     self.trim_height = cfg.TRAIN.TRIM_HEIGHT
 28 |     self.trim_width = cfg.TRAIN.TRIM_WIDTH
 29 |     self.max_num_box = cfg.MAX_NUM_GT_BOXES
 30 |     self.training = training
 31 |     self.normalize = normalize
 32 |     self.ratio_list = ratio_list
 33 |     self.ratio_index = ratio_index
 34 |     self.batch_size = batch_size
 35 |     self.data_size = len(self.ratio_list)
 36 | 
 37 |     # given the ratio_list, we want to make the ratio same for each batch.
 38 |     self.ratio_list_batch = torch.Tensor(self.data_size).zero_()
 39 |     num_batch = int(np.ceil(len(ratio_index) / batch_size))
 40 |     for i in range(num_batch):
 41 |         left_idx = i*batch_size
 42 |         right_idx = min((i+1)*batch_size-1, self.data_size-1)
 43 | 
 44 |         if ratio_list[right_idx] < 1:
 45 |             # for ratio < 1, we preserve the leftmost in each batch.
 46 |             target_ratio = ratio_list[left_idx]
 47 |         elif ratio_list[left_idx] > 1:
 48 |             # for ratio > 1, we preserve the rightmost in each batch.
 49 |             target_ratio = ratio_list[right_idx]
 50 |         else:
 51 |             # for ratio cross 1, we make it to be 1.
 52 |             target_ratio = 1
 53 | 
 54 |         self.ratio_list_batch[left_idx:(right_idx+1)] = target_ratio
 55 | 
 56 | 
 57 |   def __getitem__(self, index):
 58 |     if self.training:
 59 |         index_ratio = int(self.ratio_index[index])
 60 |     else:
 61 |         index_ratio = index
 62 | 
 63 |     # get the anchor index for current sample index
 64 |     # here we set the anchor index to the last one
 65 |     # sample in this group
 66 |     minibatch_db = [self._roidb[index_ratio]]
 67 |     blobs = get_minibatch(minibatch_db, self._num_classes)
 68 |     data = torch.from_numpy(blobs['data'])
 69 |     im_info = torch.from_numpy(blobs['im_info'])
 70 |     # we need to random shuffle the bounding box.
 71 |     data_height, data_width = data.size(1), data.size(2)
 72 |     if self.training:
 73 |         np.random.shuffle(blobs['gt_boxes'])
 74 |         gt_boxes = torch.from_numpy(blobs['gt_boxes'])
 75 |         # if self.batch_size == 1:
 76 |         #     data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
 77 |         #     im_info = im_info.view(3)
 78 |         #     num_boxes = gt_boxes.size(0)
 79 |         #
 80 |         #     return data, im_info, gt_boxes, num_boxes, blobs['img_id']
 81 | 
 82 |         ########################################################
 83 |         # padding the input image to fixed size for each group #
 84 |         ########################################################
 85 | 
 86 |         # NOTE1: need to cope with the case where a group cover both conditions. (done)
 87 |         # NOTE2: need to consider the situation for the tail samples. (no worry)
 88 |         # NOTE3: need to implement a parallel data loader. (no worry)
 89 |         # get the index range
 90 | 
 91 |         # if the image need to crop, crop to the target size.
 92 |         ratio = self.ratio_list_batch[index]
 93 | 
 94 |         if self._roidb[index_ratio]['need_crop']:
 95 |             if ratio < 1:
 96 |                 # this means that data_width << data_height, we need to crop the
 97 |                 # data_height
 98 |                 min_y = int(torch.min(gt_boxes[:,1]))
 99 |                 max_y = int(torch.max(gt_boxes[:,3]))
100 |                 trim_size = int(np.floor(data_width / ratio))
101 |                 box_region = max_y - min_y + 1
102 |                 if min_y == 0:
103 |                     y_s = 0
104 |                 else:
105 |                     if (box_region-trim_size) < 0:
106 |                         y_s_min = max(max_y-trim_size, 0)
107 |                         y_s_max = min(min_y, data_height-trim_size)
108 |                         if y_s_min == y_s_max:
109 |                             y_s = y_s_min
110 |                         else:
111 |                             y_s = np.random.choice(range(y_s_min, y_s_max))
112 |                     else:
113 |                         y_s_add = int((box_region-trim_size)/2)
114 |                         if y_s_add == 0:
115 |                             y_s = min_y
116 |                         else:
117 |                             y_s = np.random.choice(range(min_y, min_y+y_s_add))
118 |                 # crop the image
119 |                 data = data[:, y_s:(y_s + trim_size), :, :]
120 | 
121 |                 # shift y coordiante of gt_boxes
122 |                 gt_boxes[:, 1] = gt_boxes[:, 1] - float(y_s)
123 |                 gt_boxes[:, 3] = gt_boxes[:, 3] - float(y_s)
124 | 
125 |                 # update gt bounding box according the trip
126 |                 gt_boxes[:, 1].clamp_(0, trim_size - 1)
127 |                 gt_boxes[:, 3].clamp_(0, trim_size - 1)
128 | 
129 |             else:
130 |                 # this means that data_width >> data_height, we need to crop the
131 |                 # data_width
132 |                 min_x = int(torch.min(gt_boxes[:,0]))
133 |                 max_x = int(torch.max(gt_boxes[:,2]))
134 |                 trim_size = int(np.ceil(data_height * ratio))
135 |                 box_region = max_x - min_x + 1
136 |                 if min_x == 0:
137 |                     x_s = 0
138 |                 else:
139 |                     if (box_region-trim_size) < 0:
140 |                         x_s_min = max(max_x-trim_size, 0)
141 |                         x_s_max = min(min_x, data_width-trim_size)
142 |                         if x_s_min == x_s_max:
143 |                             x_s = x_s_min
144 |                         else:
145 |                             x_s = np.random.choice(range(x_s_min, x_s_max))
146 |                     else:
147 |                         x_s_add = int((box_region-trim_size)/2)
148 |                         if x_s_add == 0:
149 |                             x_s = min_x
150 |                         else:
151 |                             x_s = np.random.choice(range(min_x, min_x+x_s_add))
152 |                 # crop the image
153 |                 data = data[:, :, x_s:(x_s + trim_size), :]
154 | 
155 |                 # shift x coordiante of gt_boxes
156 |                 gt_boxes[:, 0] = gt_boxes[:, 0] - float(x_s)
157 |                 gt_boxes[:, 2] = gt_boxes[:, 2] - float(x_s)
158 |                 # update gt bounding box according the trip
159 |                 gt_boxes[:, 0].clamp_(0, trim_size - 1)
160 |                 gt_boxes[:, 2].clamp_(0, trim_size - 1)
161 | 
162 |         # based on the ratio, padding the image.
163 |         if ratio < 1:
164 |             # this means that data_width < data_height
165 |             trim_size = int(np.floor(data_width / ratio))
166 | 
167 |             padding_data = torch.FloatTensor(int(np.ceil(data_width / ratio)), \
168 |                                              data_width, 3).zero_()
169 | 
170 |             padding_data[:data_height, :, :] = data[0]
171 |             # update im_info
172 |             im_info[0, 0] = padding_data.size(0)
173 |             # print("height %d %d \n" %(index, anchor_idx))
174 |         elif ratio > 1:
175 |             # this means that data_width > data_height
176 |             # if the image need to crop.
177 |             padding_data = torch.FloatTensor(data_height, \
178 |                                              int(np.ceil(data_height * ratio)), 3).zero_()
179 |             padding_data[:, :data_width, :] = data[0]
180 |             im_info[0, 1] = padding_data.size(1)
181 |         else:
182 |             trim_size = min(data_height, data_width)
183 |             padding_data = torch.FloatTensor(trim_size, trim_size, 3).zero_()
184 |             padding_data = data[0][:trim_size, :trim_size, :]
185 |             gt_boxes.clamp_(0, trim_size)
186 |             im_info[0, 0] = trim_size
187 |             im_info[0, 1] = trim_size
188 | 
189 | 
190 |         # check the bounding box:
191 |         not_keep = (gt_boxes[:,0] == gt_boxes[:,2]) | (gt_boxes[:,1] == gt_boxes[:,3])
192 |         keep = torch.nonzero(not_keep == 0).view(-1)
193 | 
194 |         gt_boxes_padding = torch.FloatTensor(self.max_num_box, gt_boxes.size(1)).zero_()
195 |         if keep.numel() != 0:
196 |             gt_boxes = gt_boxes[keep]
197 |             num_boxes = min(gt_boxes.size(0), self.max_num_box)
198 |             gt_boxes_padding[:num_boxes,:] = gt_boxes[:num_boxes]
199 |         else:
200 |             num_boxes = 0
201 | 
202 |             # permute trim_data to adapt to downstream processing
203 |         padding_data = padding_data.permute(2, 0, 1).contiguous()
204 |         im_info = im_info.view(3)
205 | 
206 |         return padding_data, im_info, gt_boxes_padding, num_boxes, blobs['img_id']
207 |     else:
208 |         data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
209 |         im_info = im_info.view(3)
210 | 
211 |         gt_boxes = torch.FloatTensor([1,1,1,1,1])
212 |         num_boxes = 0
213 | 
214 |         return data, im_info, gt_boxes, num_boxes
215 | 
216 |   def __len__(self):
217 |     return len(self._roidb)
218 | 


--------------------------------------------------------------------------------
/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Xinlei Chen
  6 | # --------------------------------------------------------
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import os.path as osp
 13 | import PIL
 14 | from model.utils.cython_bbox import bbox_overlaps
 15 | import numpy as np
 16 | import scipy.sparse
 17 | from model.utils.config import cfg
 18 | import pdb
 19 | 
 20 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..')
 21 | 
 22 | class imdb(object):
 23 |   """Image database."""
 24 | 
 25 |   def __init__(self, name, classes=None):
 26 |     self._name = name
 27 |     self._num_classes = 0
 28 |     if not classes:
 29 |       self._classes = []
 30 |     else:
 31 |       self._classes = classes
 32 |     self._image_index = []
 33 |     self._obj_proposer = 'gt'
 34 |     self._roidb = None
 35 |     self._roidb_handler = self.default_roidb
 36 |     # Use this dict for storing dataset specific config options
 37 |     self.config = {}
 38 | 
 39 |   @property
 40 |   def name(self):
 41 |     return self._name
 42 | 
 43 |   @property
 44 |   def num_classes(self):
 45 |     return len(self._classes)
 46 | 
 47 |   @property
 48 |   def classes(self):
 49 |     return self._classes
 50 | 
 51 |   @property
 52 |   def image_index(self):
 53 |     return self._image_index
 54 | 
 55 |   @property
 56 |   def roidb_handler(self):
 57 |     return self._roidb_handler
 58 | 
 59 |   @roidb_handler.setter
 60 |   def roidb_handler(self, val):
 61 |     self._roidb_handler = val
 62 | 
 63 |   def set_proposal_method(self, method):
 64 |     method = eval('self.' + method + '_roidb')
 65 |     self.roidb_handler = method
 66 | 
 67 |   @property
 68 |   def roidb(self):
 69 |     # A roidb is a list of dictionaries, each with the following keys:
 70 |     #   boxes
 71 |     #   gt_overlaps
 72 |     #   gt_classes
 73 |     #   flipped
 74 |     if self._roidb is not None:
 75 |       return self._roidb
 76 |     self._roidb = self.roidb_handler()
 77 |     return self._roidb
 78 | 
 79 |   @property
 80 |   def cache_path(self):
 81 |     cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
 82 |     if not os.path.exists(cache_path):
 83 |       os.makedirs(cache_path)
 84 |     return cache_path
 85 | 
 86 |   @property
 87 |   def num_images(self):
 88 |     return len(self.image_index)
 89 | 
 90 |   def image_path_at(self, i):
 91 |     raise NotImplementedError
 92 | 
 93 |   def image_id_at(self, i):
 94 |     raise NotImplementedError
 95 | 
 96 |   def default_roidb(self):
 97 |     raise NotImplementedError
 98 | 
 99 |   def evaluate_detections(self, all_boxes, output_dir=None):
100 |     """
101 |     all_boxes is a list of length number-of-classes.
102 |     Each list element is a list of length number-of-images.
103 |     Each of those list elements is either an empty list []
104 |     or a numpy array of detection.
105 | 
106 |     all_boxes[class][image] = [] or np.array of shape #dets x 5
107 |     """
108 |     raise NotImplementedError
109 | 
110 |   def _get_widths(self):
111 |     return [PIL.Image.open(self.image_path_at(i)).size[0]
112 |             for i in range(self.num_images)]
113 | 
114 |   def append_flipped_images(self):
115 |     num_images = self.num_images
116 |     widths = self._get_widths()
117 |     for i in range(num_images):
118 |       boxes = self.roidb[i]['boxes'].copy()
119 |       oldx1 = boxes[:, 0].copy()
120 |       oldx2 = boxes[:, 2].copy()
121 |       boxes[:, 0] = widths[i] - oldx2 - 1
122 |       boxes[:, 2] = widths[i] - oldx1 - 1
123 |       assert (boxes[:, 2] >= boxes[:, 0]).all()
124 |       entry = {'boxes': boxes,
125 |                'gt_overlaps': self.roidb[i]['gt_overlaps'],
126 |                'gt_classes': self.roidb[i]['gt_classes'],
127 |                'flipped': True}
128 |       self.roidb.append(entry)
129 |     self._image_index = self._image_index * 2
130 | 
131 |   def evaluate_recall(self, candidate_boxes=None, thresholds=None,
132 |                       area='all', limit=None):
133 |     """Evaluate detection proposal recall metrics.
134 | 
135 |     Returns:
136 |         results: dictionary of results with keys
137 |             'ar': average recall
138 |             'recalls': vector recalls at each IoU overlap threshold
139 |             'thresholds': vector of IoU overlap thresholds
140 |             'gt_overlaps': vector of all ground-truth overlaps
141 |     """
142 |     # Record max overlap value for each gt box
143 |     # Return vector of overlap values
144 |     areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3,
145 |              '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
146 |     area_ranges = [[0 ** 2, 1e5 ** 2],  # all
147 |                    [0 ** 2, 32 ** 2],  # small
148 |                    [32 ** 2, 96 ** 2],  # medium
149 |                    [96 ** 2, 1e5 ** 2],  # large
150 |                    [96 ** 2, 128 ** 2],  # 96-128
151 |                    [128 ** 2, 256 ** 2],  # 128-256
152 |                    [256 ** 2, 512 ** 2],  # 256-512
153 |                    [512 ** 2, 1e5 ** 2],  # 512-inf
154 |                    ]
155 |     assert area in areas, 'unknown area range: {}'.format(area)
156 |     area_range = area_ranges[areas[area]]
157 |     gt_overlaps = np.zeros(0)
158 |     num_pos = 0
159 |     for i in range(self.num_images):
160 |       # Checking for max_overlaps == 1 avoids including crowd annotations
161 |       # (...pretty hacking :/)
162 |       max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
163 |       gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
164 |                          (max_gt_overlaps == 1))[0]
165 |       gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
166 |       gt_areas = self.roidb[i]['seg_areas'][gt_inds]
167 |       valid_gt_inds = np.where((gt_areas >= area_range[0]) &
168 |                                (gt_areas <= area_range[1]))[0]
169 |       gt_boxes = gt_boxes[valid_gt_inds, :]
170 |       num_pos += len(valid_gt_inds)
171 | 
172 |       if candidate_boxes is None:
173 |         # If candidate_boxes is not supplied, the default is to use the
174 |         # non-ground-truth boxes from this roidb
175 |         non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
176 |         boxes = self.roidb[i]['boxes'][non_gt_inds, :]
177 |       else:
178 |         boxes = candidate_boxes[i]
179 |       if boxes.shape[0] == 0:
180 |         continue
181 |       if limit is not None and boxes.shape[0] > limit:
182 |         boxes = boxes[:limit, :]
183 | 
184 |       overlaps = bbox_overlaps(boxes.astype(np.float),
185 |                                gt_boxes.astype(np.float))
186 | 
187 |       _gt_overlaps = np.zeros((gt_boxes.shape[0]))
188 |       for j in range(gt_boxes.shape[0]):
189 |         # find which proposal box maximally covers each gt box
190 |         argmax_overlaps = overlaps.argmax(axis=0)
191 |         # and get the iou amount of coverage for each gt box
192 |         max_overlaps = overlaps.max(axis=0)
193 |         # find which gt box is 'best' covered (i.e. 'best' = most iou)
194 |         gt_ind = max_overlaps.argmax()
195 |         gt_ovr = max_overlaps.max()
196 |         assert (gt_ovr >= 0)
197 |         # find the proposal box that covers the best covered gt box
198 |         box_ind = argmax_overlaps[gt_ind]
199 |         # record the iou coverage of this gt box
200 |         _gt_overlaps[j] = overlaps[box_ind, gt_ind]
201 |         assert (_gt_overlaps[j] == gt_ovr)
202 |         # mark the proposal box and the gt box as used
203 |         overlaps[box_ind, :] = -1
204 |         overlaps[:, gt_ind] = -1
205 |       # append recorded iou coverage level
206 |       gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
207 | 
208 |     gt_overlaps = np.sort(gt_overlaps)
209 |     if thresholds is None:
210 |       step = 0.05
211 |       thresholds = np.arange(0.5, 0.95 + 1e-5, step)
212 |     recalls = np.zeros_like(thresholds)
213 |     # compute recall for each iou threshold
214 |     for i, t in enumerate(thresholds):
215 |       recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
216 |     # ar = 2 * np.trapz(recalls, thresholds)
217 |     ar = recalls.mean()
218 |     return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
219 |             'gt_overlaps': gt_overlaps}
220 | 
221 |   def create_roidb_from_box_list(self, box_list, gt_roidb):
222 |     assert len(box_list) == self.num_images, \
223 |       'Number of boxes must match number of ground-truth images'
224 |     roidb = []
225 |     for i in range(self.num_images):
226 |       boxes = box_list[i]
227 |       num_boxes = boxes.shape[0]
228 |       overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
229 | 
230 |       if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
231 |         gt_boxes = gt_roidb[i]['boxes']
232 |         gt_classes = gt_roidb[i]['gt_classes']
233 |         gt_overlaps = bbox_overlaps(boxes.astype(np.float),
234 |                                     gt_boxes.astype(np.float))
235 |         argmaxes = gt_overlaps.argmax(axis=1)
236 |         maxes = gt_overlaps.max(axis=1)
237 |         I = np.where(maxes > 0)[0]
238 |         overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
239 | 
240 |       overlaps = scipy.sparse.csr_matrix(overlaps)
241 |       roidb.append({
242 |         'boxes': boxes,
243 |         'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
244 |         'gt_overlaps': overlaps,
245 |         'flipped': False,
246 |         'seg_areas': np.zeros((num_boxes,), dtype=np.float32),
247 |       })
248 |     return roidb
249 | 
250 |   @staticmethod
251 |   def merge_roidbs(a, b):
252 |     assert len(a) == len(b)
253 |     for i in range(len(a)):
254 |       a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
255 |       a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
256 |                                       b[i]['gt_classes']))
257 |       a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
258 |                                                  b[i]['gt_overlaps']])
259 |       a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
260 |                                      b[i]['seg_areas']))
261 |     return a
262 | 
263 |   def competition_mode(self, on):
264 |     """Turn competition mode on or off."""
265 |     pass
266 | 


--------------------------------------------------------------------------------
/lib/model/rpn/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | # --------------------------------------------------------
  8 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
  9 | # --------------------------------------------------------
 10 | 
 11 | import torch
 12 | import numpy as np
 13 | import pdb
 14 | 
 15 | def bbox_transform(ex_rois, gt_rois):
 16 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 17 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 18 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
 19 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
 20 | 
 21 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 22 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 23 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
 24 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
 25 | 
 26 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
 27 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 28 |     targets_dw = torch.log(gt_widths / ex_widths)
 29 |     targets_dh = torch.log(gt_heights / ex_heights)
 30 | 
 31 |     targets = torch.stack(
 32 |         (targets_dx, targets_dy, targets_dw, targets_dh),1)
 33 | 
 34 |     return targets
 35 | 
 36 | def bbox_transform_batch(ex_rois, gt_rois):
 37 | 
 38 |     if ex_rois.dim() == 2:
 39 |         ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 40 |         ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 41 |         ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
 42 |         ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
 43 | 
 44 |         gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
 45 |         gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
 46 |         gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
 47 |         gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
 48 | 
 49 |         targets_dx = (gt_ctr_x - ex_ctr_x.view(1,-1).expand_as(gt_ctr_x)) / ex_widths
 50 |         targets_dy = (gt_ctr_y - ex_ctr_y.view(1,-1).expand_as(gt_ctr_y)) / ex_heights
 51 |         targets_dw = torch.log(gt_widths / ex_widths.view(1,-1).expand_as(gt_widths))
 52 |         targets_dh = torch.log(gt_heights / ex_heights.view(1,-1).expand_as(gt_heights))
 53 | 
 54 |     elif ex_rois.dim() == 3:
 55 |         ex_widths = ex_rois[:, :, 2] - ex_rois[:, :, 0] + 1.0
 56 |         ex_heights = ex_rois[:,:, 3] - ex_rois[:,:, 1] + 1.0
 57 |         ex_ctr_x = ex_rois[:, :, 0] + 0.5 * ex_widths
 58 |         ex_ctr_y = ex_rois[:, :, 1] + 0.5 * ex_heights
 59 | 
 60 |         gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
 61 |         gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
 62 |         gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
 63 |         gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
 64 | 
 65 |         targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
 66 |         targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 67 |         targets_dw = torch.log(gt_widths / ex_widths)
 68 |         targets_dh = torch.log(gt_heights / ex_heights)
 69 |     else:
 70 |         raise ValueError('ex_roi input dimension is not correct.')
 71 | 
 72 |     targets = torch.stack(
 73 |         (targets_dx, targets_dy, targets_dw, targets_dh),2)
 74 | 
 75 |     return targets
 76 | 
 77 | def bbox_transform_inv(boxes, deltas, batch_size):
 78 |     widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0
 79 |     heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0
 80 |     ctr_x = boxes[:, :, 0] + 0.5 * widths
 81 |     ctr_y = boxes[:, :, 1] + 0.5 * heights
 82 | 
 83 |     dx = deltas[:, :, 0::4]
 84 |     dy = deltas[:, :, 1::4]
 85 |     dw = deltas[:, :, 2::4]
 86 |     dh = deltas[:, :, 3::4]
 87 | 
 88 |     pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2)
 89 |     pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2)
 90 |     pred_w = torch.exp(dw) * widths.unsqueeze(2)
 91 |     pred_h = torch.exp(dh) * heights.unsqueeze(2)
 92 | 
 93 |     pred_boxes = deltas.clone()
 94 |     # x1
 95 |     pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w
 96 |     # y1
 97 |     pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h
 98 |     # x2
 99 |     pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w
100 |     # y2
101 |     pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h
102 | 
103 |     return pred_boxes
104 | 
105 | def clip_boxes_batch(boxes, im_shape, batch_size):
106 |     """
107 |     Clip boxes to image boundaries.
108 |     """
109 |     num_rois = boxes.size(1)
110 | 
111 |     boxes[boxes < 0] = 0
112 |     # batch_x = (im_shape[:,0]-1).view(batch_size, 1).expand(batch_size, num_rois)
113 |     # batch_y = (im_shape[:,1]-1).view(batch_size, 1).expand(batch_size, num_rois)
114 | 
115 |     batch_x = im_shape[:, 1] - 1
116 |     batch_y = im_shape[:, 0] - 1
117 | 
118 |     boxes[:,:,0][boxes[:,:,0] > batch_x] = batch_x
119 |     boxes[:,:,1][boxes[:,:,1] > batch_y] = batch_y
120 |     boxes[:,:,2][boxes[:,:,2] > batch_x] = batch_x
121 |     boxes[:,:,3][boxes[:,:,3] > batch_y] = batch_y
122 | 
123 |     return boxes
124 | 
125 | def clip_boxes(boxes, im_shape, batch_size):
126 | 
127 |     for i in range(batch_size):
128 |         boxes[i,:,0::4].clamp_(0, im_shape[i, 1]-1)
129 |         boxes[i,:,1::4].clamp_(0, im_shape[i, 0]-1)
130 |         boxes[i,:,2::4].clamp_(0, im_shape[i, 1]-1)
131 |         boxes[i,:,3::4].clamp_(0, im_shape[i, 0]-1)
132 | 
133 |     return boxes
134 | 
135 | 
136 | def bbox_overlaps(anchors, gt_boxes):
137 |     """
138 |     anchors: (N, 4) ndarray of float
139 |     gt_boxes: (K, 4) ndarray of float
140 | 
141 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
142 |     """
143 |     N = anchors.size(0)
144 |     K = gt_boxes.size(0)
145 | 
146 |     gt_boxes_area = ((gt_boxes[:,2] - gt_boxes[:,0] + 1) *
147 |                 (gt_boxes[:,3] - gt_boxes[:,1] + 1)).view(1, K)
148 | 
149 |     anchors_area = ((anchors[:,2] - anchors[:,0] + 1) *
150 |                 (anchors[:,3] - anchors[:,1] + 1)).view(N, 1)
151 | 
152 |     boxes = anchors.view(N, 1, 4).expand(N, K, 4)
153 |     query_boxes = gt_boxes.view(1, K, 4).expand(N, K, 4)
154 | 
155 |     iw = (torch.min(boxes[:,:,2], query_boxes[:,:,2]) -
156 |         torch.max(boxes[:,:,0], query_boxes[:,:,0]) + 1)
157 |     iw[iw < 0] = 0
158 | 
159 |     ih = (torch.min(boxes[:,:,3], query_boxes[:,:,3]) -
160 |         torch.max(boxes[:,:,1], query_boxes[:,:,1]) + 1)
161 |     ih[ih < 0] = 0
162 | 
163 |     ua = anchors_area + gt_boxes_area - (iw * ih)
164 |     overlaps = iw * ih / ua
165 | 
166 |     return overlaps
167 | 
168 | def bbox_overlaps_batch(anchors, gt_boxes):
169 |     """
170 |     anchors: (N, 4) ndarray of float
171 |     gt_boxes: (b, K, 5) ndarray of float
172 | 
173 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
174 |     """
175 |     batch_size = gt_boxes.size(0)
176 | 
177 | 
178 |     if anchors.dim() == 2:
179 | 
180 |         N = anchors.size(0)
181 |         K = gt_boxes.size(1)
182 | 
183 |         anchors = anchors.view(1, N, 4).expand(batch_size, N, 4).contiguous()
184 |         gt_boxes = gt_boxes[:,:,:4].contiguous()
185 | 
186 | 
187 |         gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1)
188 |         gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1)
189 |         gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
190 | 
191 |         anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1)
192 |         anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1)
193 |         anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
194 | 
195 |         gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
196 |         anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
197 | 
198 |         boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
199 |         query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
200 | 
201 |         iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) -
202 |             torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1)
203 |         iw[iw < 0] = 0
204 | 
205 |         ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) -
206 |             torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1)
207 |         ih[ih < 0] = 0
208 |         ua = anchors_area + gt_boxes_area - (iw * ih)
209 |         overlaps = iw * ih / ua
210 | 
211 |         # mask the overlap here.
212 |         overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
213 |         overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
214 | 
215 |     elif anchors.dim() == 3:
216 |         N = anchors.size(1)
217 |         K = gt_boxes.size(1)
218 | 
219 |         if anchors.size(2) == 4:
220 |             anchors = anchors[:,:,:4].contiguous()
221 |         else:
222 |             anchors = anchors[:,:,1:5].contiguous()
223 | 
224 |         gt_boxes = gt_boxes[:,:,:4].contiguous()
225 | 
226 |         gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1)
227 |         gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1)
228 |         gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
229 | 
230 |         anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1)
231 |         anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1)
232 |         anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
233 | 
234 |         gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
235 |         anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
236 | 
237 |         boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
238 |         query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
239 | 
240 |         iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) -
241 |             torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1)
242 |         iw[iw < 0] = 0
243 | 
244 |         ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) -
245 |             torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1)
246 |         ih[ih < 0] = 0
247 |         ua = anchors_area + gt_boxes_area - (iw * ih)
248 | 
249 |         overlaps = iw * ih / ua
250 | 
251 |         # mask the overlap here.
252 |         overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
253 |         overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
254 |     else:
255 |         raise ValueError('anchors input dimension is not correct.')
256 | 
257 |     return overlaps
258 | 


--------------------------------------------------------------------------------