├── lib
    ├── model
    │   ├── __init__.py
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── nms
    │   │   │   │   └── __init__.py
    │   │   ├── .gitignore
    │   │   ├── make.sh
    │   │   ├── src
    │   │   │   ├── nms_cuda_kernel.h
    │   │   │   ├── nms_cuda.h
    │   │   │   └── nms_cuda_kernel.cu
    │   │   ├── nms_gpu.py
    │   │   ├── nms_wrapper.py
    │   │   ├── build.py
    │   │   ├── nms_cpu.py
    │   │   └── nms_kernel.cu
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── generate_anchors.py
    │   │   └── rpn.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── .gitignore
    │   │   ├── blob.py
    │   │   ├── losses.py
    │   │   ├── logger.py
    │   │   ├── bbox.pyx
    │   │   ├── fsod_logger.py
    │   │   └── net_utils.py
    │   ├── framework
    │   │   └── __init__.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align
    │   │   │   │   └── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_align.py
    │   │   ├── make.sh
    │   │   ├── src
    │   │   │   ├── roi_align.h
    │   │   │   ├── roi_align_cuda.h
    │   │   │   ├── roi_align_kernel.h
    │   │   │   ├── roi_align_cuda.c
    │   │   │   ├── roi_align.c
    │   │   │   └── roi_align_kernel.cu
    │   │   └── build.py
    │   ├── roi_crop
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   ├── crop_resize
    │   │   │   │   └── __init__.py
    │   │   │   └── roi_crop
    │   │   │   │   └── __init__.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_crop.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_crop.py
    │   │   │   ├── crop_resize.py
    │   │   │   └── gridgen.py
    │   │   ├── make.sh
    │   │   ├── src
    │   │   │   ├── roi_crop_cuda.h
    │   │   │   ├── roi_crop.h
    │   │   │   ├── roi_crop_cuda_kernel.h
    │   │   │   └── roi_crop_cuda.c
    │   │   └── build.py
    │   ├── roi_pooling
    │   │   ├── __init__.py
    │   │   ├── _ext
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pooling
    │   │   │   │   └── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   └── roi_pool.py
    │   │   ├── src
    │   │   │   ├── roi_pooling.h
    │   │   │   ├── roi_pooling_cuda.h
    │   │   │   ├── roi_pooling_kernel.h
    │   │   │   ├── roi_pooling_cuda.c
    │   │   │   └── roi_pooling.c
    │   │   └── build.py
    │   ├── roi_layers
    │   │   ├── nms.py
    │   │   ├── __init__.py
    │   │   ├── roi_pool.py
    │   │   └── roi_align.py
    │   └── csrc
    │   │   ├── vision.cpp
    │   │   ├── cpu
    │   │       ├── vision.h
    │   │       └── nms_cpu.cpp
    │   │   ├── nms.h
    │   │   ├── ROIPool.h
    │   │   ├── ROIAlign.h
    │   │   └── cuda
    │   │       ├── vision.h
    │   │       └── nms.cu
    ├── datasets
    │   ├── __init__.py
    │   ├── ds_utils.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   ├── debug.ipynb
    │   ├── factory.py
    │   ├── vg_eval.py
    │   └── voc_eval.py
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── general_test_loader.py
    │   ├── minibatch.py
    │   ├── roidb.py
    │   ├── allcls_fs_loader.py
    │   ├── inference_loader.py
    │   └── multiway_loader.py
    └── setup.py
├── images
    ├── prediction.jpg
    └── attention_visualization.jpg
├── .gitignore
├── cfgs
    ├── vgg16.yml
    ├── res101.yml
    ├── res101_ls.yml
    └── res50.yml
├── env.yml
├── inference.py
├── README.md
└── train.py


/lib/model/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/framework/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/nms/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/lib/model/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/images/prediction.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tung-I/Dual-awareness-Attention-for-Few-shot-Object-Detection/HEAD/images/prediction.jpg


--------------------------------------------------------------------------------
/images/attention_visualization.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tung-I/Dual-awareness-Attention-for-Few-shot-Object-Detection/HEAD/images/attention_visualization.jpg


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/lib/model/roi_layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from model import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/lib/model/nms/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling stnm kernels by nvcc..."
 7 | nvcc -c -o nms_cuda_kernel.cu.o nms_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 6 |           int boxes_dim, float nms_overlap_thresh);
 7 | 
 8 | #ifdef __cplusplus
 9 | }
10 | #endif
11 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/make.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_crop_cuda_kernel.cu.o roi_crop_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_52
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | data/
 2 | models/
 3 | __pycache__/
 4 | lib/datasets/__pycache__/*
 5 | lib/model/utils/__pycache__/*
 6 | lib/roi_data_layer/__pycache__/*
 7 | lib/build/
 8 | lib/pycocotools/
 9 | lib/faster_rcnn.egg-info
10 | lib/model/_C*
11 | output/*
12 | inference_output/*
13 | resnet50*.pth
14 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | // int nms_cuda(THCudaTensor *keep_out, THCudaTensor *num_out,
2 | //             THCudaTensor *boxes_host, THCudaTensor *nms_overlap_thresh);
3 | 
4 | int nms_cuda(THCudaIntTensor *keep_out, THCudaTensor *boxes_host,
5 |              THCudaIntTensor *num_out, float nms_overlap_thresh);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/modules/roi_crop.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_crop import RoICropFunction
3 | 
4 | class _RoICrop(Module):
5 |     def __init__(self, layout = 'BHWD'):
6 |         super(_RoICrop, self).__init__()
7 |     def forward(self, input1, input2):
8 |         return RoICropFunction()(input1, input2)
9 | 


--------------------------------------------------------------------------------
/cfgs/vgg16.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: vgg16
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   BATCH_SIZE: 256
10 |   LEARNING_RATE: 0.01
11 | TEST:
12 |   HAS_RPN: True
13 | POOLING_MODE: align
14 | CROP_RESIZE_WITH_MAX_POOL: False
15 | 


--------------------------------------------------------------------------------
/lib/model/roi_layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from .nms import nms
 4 | from .roi_align import ROIAlign
 5 | from .roi_align import roi_align
 6 | from .roi_pool import ROIPool
 7 | from .roi_pool import roi_pool
 8 | 
 9 | __all__ = ["nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool"]
10 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_gpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import torch
 3 | import numpy as np
 4 | from ._ext import nms
 5 | import pdb
 6 | 
 7 | def nms_gpu(dets, thresh):
 8 | 	keep = dets.new(dets.size(0), 1).zero_().int()
 9 | 	num_out = dets.new(1).zero_().int()
10 | 	nms.nms_cuda(keep, dets, num_out, thresh)
11 | 	keep = keep[:num_out[0]]
12 | 	return keep
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
2 |                       THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
3 | 
4 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
5 |                       THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/crop_resize/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._crop_resize import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         locals[symbol] = _wrap_function(fn, _ffi)
10 |         __all__.append(symbol)
11 | 
12 | _import_symbols(locals())
13 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/cfgs/res101.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   DOUBLE_BIAS: False
13 |   LEARNING_RATE: 0.001
14 | TEST:
15 |   HAS_RPN: True
16 | POOLING_SIZE: 7
17 | POOLING_MODE: align
18 | CROP_RESIZE_WITH_MAX_POOL: False
19 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/lib/model/nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/_ext/roi_crop/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_crop import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.h:
--------------------------------------------------------------------------------
1 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
2 | // we assume BHWD format in inputImages
3 | // we assume BHW(YX) format on grids
4 | 
5 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output);
6 | 
7 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
8 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput);
9 | 


--------------------------------------------------------------------------------
/cfgs/res101_ls.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: res101
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_BATCHSIZE: 256
 7 |   PROPOSAL_METHOD: gt
 8 |   BG_THRESH_LO: 0.0
 9 |   DISPLAY: 20
10 |   BATCH_SIZE: 128
11 |   WEIGHT_DECAY: 0.0001
12 |   SCALES: [800]
13 |   DOUBLE_BIAS: False
14 |   LEARNING_RATE: 0.001
15 | TEST:
16 |   HAS_RPN: True
17 |   SCALES: [800]
18 |   MAX_SIZE: 1200
19 |   RPN_POST_NMS_TOP_N: 1000
20 | POOLING_SIZE: 7
21 | POOLING_MODE: align
22 | CROP_RESIZE_WITH_MAX_POOL: False
23 | 


--------------------------------------------------------------------------------
/lib/model/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | 
 6 | 
 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 8 |   m.def("nms", &nms, "non-maximum suppression");
 9 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
10 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
11 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
12 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
13 | }
14 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
15 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop.h:
--------------------------------------------------------------------------------
 1 | int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 2 | 
 3 | int BilinearSamplerBHWD_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
 4 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
 5 | 
 6 | 
 7 | 
 8 | int BilinearSamplerBCHW_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output);
 9 | 
10 | int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
11 |                                         THFloatTensor *gradGrids, THFloatTensor *gradOutput);
12 | 


--------------------------------------------------------------------------------
/lib/model/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | import torch
 8 | from model.utils.config import cfg
 9 | if torch.cuda.is_available():
10 |     from model.nms.nms_gpu import nms_gpu
11 | from model.nms.nms_cpu import nms_cpu
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 |     if dets.shape[0] == 0:
16 |         return []
17 |     # ---numpy version---
18 |     # original: return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     # ---pytorch version---
20 | 
21 |     return nms_gpu(dets, thresh) if force_cpu == False else nms_cpu(dets, thresh)
22 | 


--------------------------------------------------------------------------------
/cfgs/res50.yml:
--------------------------------------------------------------------------------
 1 | # EXP_DIR: res50
 2 | # TRAIN:
 3 | #   HAS_RPN: True
 4 | #   # IMS_PER_BATCH: 1
 5 | #   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 6 | #   RPN_POSITIVE_OVERLAP: 0.7
 7 | #   RPN_BATCHSIZE: 256
 8 | #   PROPOSAL_METHOD: gt
 9 | #   BG_THRESH_LO: 0.0
10 | #   DISPLAY: 20
11 | #   BATCH_SIZE: 256
12 | #   WEIGHT_DECAY: 0.0001
13 | #   DOUBLE_BIAS: False
14 | #   SNAPSHOT_PREFIX: res50_faster_rcnn
15 | # TEST:
16 | #   HAS_RPN: True
17 | # POOLING_MODE: crop
18 | 
19 | EXP_DIR: res50
20 | TRAIN:
21 |   HAS_RPN: True
22 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
23 |   RPN_POSITIVE_OVERLAP: 0.7
24 |   RPN_BATCHSIZE: 256
25 |   PROPOSAL_METHOD: gt
26 |   BG_THRESH_LO: 0.0
27 |   DISPLAY: 20
28 |   BATCH_SIZE: 128
29 |   WEIGHT_DECAY: 0.0001
30 |   DOUBLE_BIAS: False
31 |   LEARNING_RATE: 0.001
32 | TEST:
33 |   HAS_RPN: True
34 | POOLING_SIZE: 7
35 | POOLING_MODE: align
36 | CROP_RESIZE_WITH_MAX_POOL: False
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/model/nms/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/nms_cuda.c']
16 |     headers += ['src/nms_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/nms_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | print(extra_objects)
25 | 
26 | ffi = create_extension(
27 |     '_ext.nms',
28 |     headers=headers,
29 |     sources=sources,
30 |     define_macros=defines,
31 |     relative_to=__file__,
32 |     with_cuda=with_cuda,
33 |     extra_objects=extra_objects
34 | )
35 | 
36 | if __name__ == '__main__':
37 |     ffi.build()
38 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_cpu.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | def nms_cpu(dets, thresh):
 7 |     dets = dets.numpy()
 8 |     x1 = dets[:, 0]
 9 |     y1 = dets[:, 1]
10 |     x2 = dets[:, 2]
11 |     y2 = dets[:, 3]
12 |     scores = dets[:, 4]
13 | 
14 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
15 |     order = scores.argsort()[::-1]
16 | 
17 |     keep = []
18 |     while order.size > 0:
19 |         i = order.item(0)
20 |         keep.append(i)
21 |         xx1 = np.maximum(x1[i], x1[order[1:]])
22 |         yy1 = np.maximum(y1[i], y1[order[1:]])
23 |         xx2 = np.maximum(x2[i], x2[order[1:]])
24 |         yy2 = np.maximum(y2[i], y2[order[1:]])
25 | 
26 |         w = np.maximum(0.0, xx2 - xx1 + 1)
27 |         h = np.maximum(0.0, yy2 - yy1 + 1)
28 |         inter = w * h
29 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
30 | 
31 |         inds = np.where(ovr <= thresh)[0]
32 |         order = order[inds + 1]
33 | 
34 |     return torch.IntTensor(keep)
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | 
 7 | sources = ['src/roi_pooling.c']
 8 | headers = ['src/roi_pooling.h']
 9 | extra_objects = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | this_file = os.path.dirname(os.path.realpath(__file__))
14 | print(this_file)
15 | 
16 | if torch.cuda.is_available():
17 |     print('Including CUDA code.')
18 |     sources += ['src/roi_pooling_cuda.c']
19 |     headers += ['src/roi_pooling_cuda.h']
20 |     defines += [('WITH_CUDA', None)]
21 |     with_cuda = True
22 |     extra_objects = ['src/roi_pooling.cu.o']
23 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_pooling',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | #this_file = os.path.dirname(__file__)
 7 | 
 8 | sources = ['src/roi_crop.c']
 9 | headers = ['src/roi_crop.h']
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_crop_cuda.c']
16 |     headers += ['src/roi_crop_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/roi_crop_cuda_kernel.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_crop',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import torch
 4 | from torch.utils.ffi import create_extension
 5 | 
 6 | sources = ['src/roi_align.c']
 7 | headers = ['src/roi_align.h']
 8 | extra_objects = []
 9 | #sources = []
10 | #headers = []
11 | defines = []
12 | with_cuda = False
13 | 
14 | this_file = os.path.dirname(os.path.realpath(__file__))
15 | print(this_file)
16 | 
17 | if torch.cuda.is_available():
18 |     print('Including CUDA code.')
19 |     sources += ['src/roi_align_cuda.c']
20 |     headers += ['src/roi_align_cuda.h']
21 |     defines += [('WITH_CUDA', None)]
22 |     with_cuda = True
23 |     
24 |     extra_objects = ['src/roi_align_kernel.cu.o']
25 |     extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
26 | 
27 | ffi = create_extension(
28 |     '_ext.roi_align',
29 |     headers=headers,
30 |     sources=sources,
31 |     define_macros=defines,
32 |     relative_to=__file__,
33 |     with_cuda=with_cuda,
34 |     extra_objects=extra_objects
35 | )
36 | 
37 | if __name__ == '__main__':
38 |     ffi.build()
39 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/roi_crop.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | import pdb
 6 | 
 7 | class RoICropFunction(Function):
 8 |     def forward(self, input1, input2):
 9 |         self.input1 = input1.clone()
10 |         self.input2 = input2.clone()
11 |         output = input2.new(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2]).zero_()
12 |         assert output.get_device() == input1.get_device(), "output and input1 must on the same device"
13 |         assert output.get_device() == input2.get_device(), "output and input2 must on the same device"
14 |         roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
15 |         return output
16 | 
17 |     def backward(self, grad_output):
18 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
19 |         grad_input2 = self.input2.new(self.input2.size()).zero_()
20 |         roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
21 |         return grad_input1, grad_input2
22 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import sys
 4 | 
 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 7 | so that it's consistent with those computed by Jan Hosang (see:
 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 9 |   computing/research/object-recognition-and-scene-understanding/how-
10 |   good-are-detection-proposals-really/)
11 | 
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 | 
16 | def munge(src_dir):
17 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 | 
20 |     files = os.listdir(src_dir)
21 |     for fn in files:
22 |         base, ext = os.path.splitext(fn)
23 |         # first 14 chars / first 22 chars / all chars + .mat
24 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 |         first = base[:14]
26 |         second = base[:22]
27 |         dst_dir = os.path.join('MCG', 'mat', first, second)
28 |         if not os.path.exists(dst_dir):
29 |             os.makedirs(dst_dir)
30 |         src = os.path.join(src_dir, fn)
31 |         dst = os.path.join(dst_dir, fn)
32 |         print('MV: {} -> {}'.format(src, dst))
33 |         os.rename(src, dst)
34 | 
35 | if __name__ == '__main__':
36 |     # src_dir should look something like:
37 |     #  src_dir = 'MCG-COCO-val2014-boxes'
38 |     src_dir = sys.argv[1]
39 |     munge(src_dir)
40 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/crop_resize.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | from .._ext import roi_crop
 5 | from cffi import FFI
 6 | ffi = FFI()
 7 | 
 8 | class RoICropFunction(Function):
 9 |     def forward(self, input1, input2):
10 |         self.input1 = input1
11 |         self.input2 = input2
12 |         self.device_c = ffi.new("int *")
13 |         output = torch.zeros(input2.size()[0], input1.size()[1], input2.size()[1], input2.size()[2])
14 |         #print('decice %d' % torch.cuda.current_device())
15 |         if input1.is_cuda:
16 |             self.device = torch.cuda.current_device()
17 |         else:
18 |             self.device = -1
19 |         self.device_c[0] = self.device
20 |         if not input1.is_cuda:
21 |             roi_crop.BilinearSamplerBHWD_updateOutput(input1, input2, output)
22 |         else:
23 |             output = output.cuda(self.device)
24 |             roi_crop.BilinearSamplerBHWD_updateOutput_cuda(input1, input2, output)
25 |         return output
26 | 
27 |     def backward(self, grad_output):
28 |         grad_input1 = torch.zeros(self.input1.size())
29 |         grad_input2 = torch.zeros(self.input2.size())
30 |         #print('backward decice %d' % self.device)
31 |         if not grad_output.is_cuda:
32 |             roi_crop.BilinearSamplerBHWD_updateGradInput(self.input1, self.input2, grad_input1, grad_input2, grad_output)
33 |         else:
34 |             grad_input1 = grad_input1.cuda(self.device)
35 |             grad_input2 = grad_input2.cuda(self.device)
36 |             roi_crop.BilinearSamplerBHWD_updateGradInput_cuda(self.input1, self.input2, grad_input1, grad_input2, grad_output)
37 |         return grad_input1, grad_input2
38 | 


--------------------------------------------------------------------------------
/lib/model/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/model/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
28 |                                 self.spatial_scale)(features, rois)
29 |         return avg_pool2d(x, kernel_size=2, stride=1)
30 | 
31 | class RoIAlignMax(Module):
32 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
33 |         super(RoIAlignMax, self).__init__()
34 | 
35 |         self.aligned_width = int(aligned_width)
36 |         self.aligned_height = int(aligned_height)
37 |         self.spatial_scale = float(spatial_scale)
38 | 
39 |     def forward(self, features, rois):
40 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
41 |                                 self.spatial_scale)(features, rois)
42 |         return max_pool2d(x, kernel_size=2, stride=1)
43 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
26 |                                                  features, rois, output, ctx.argmax)
27 | 
28 |         return output
29 | 
30 |     def backward(ctx, grad_output):
31 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
32 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
33 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
34 | 
35 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
36 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
37 | 
38 |         return grad_input, None
39 | 


--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | # from scipy.misc import imread, imresize
12 | import cv2
13 | 
14 | try:
15 |     xrange          # Python 2
16 | except NameError:
17 |     xrange = range  # Python 3
18 | 
19 | 
20 | def im_list_to_blob(ims):
21 |     """Convert a list of images into a network input.
22 | 
23 |     Assumes images are already prepared (means subtracted, BGR order, ...).
24 |     """
25 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)  # (max of H, max of W), but there is just one image actually
26 |     num_images = len(ims)  # num_images = 1
27 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
28 |                     dtype=np.float32)
29 |     for i in xrange(num_images):
30 |         im = ims[i]
31 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
32 | 
33 |     return blob
34 | 
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 |     """Mean subtract and scale an image for use in a blob."""
37 | 
38 |     im = im.astype(np.float32, copy=False)
39 |     im -= pixel_means
40 |     # im = im[:, :, ::-1]
41 |     im_shape = im.shape
42 |     im_size_min = np.min(im_shape[0:2])
43 |     im_size_max = np.max(im_shape[0:2])
44 |     im_scale = float(target_size) / float(im_size_min)
45 |     # Prevent the biggest axis from being more than MAX_SIZE
46 |     # if np.round(im_scale * im_size_max) > max_size:
47 |     #     im_scale = float(max_size) / float(im_size_max)
48 |     # im = imresize(im, im_scale)
49 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
50 |                     interpolation=cv2.INTER_LINEAR)
51 | 
52 |     return im, im_scale
53 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | 
 4 | import torch
 5 | from setuptools import find_packages
 6 | from setuptools import setup
 7 | from torch.utils.cpp_extension import CUDA_HOME
 8 | from torch.utils.cpp_extension import CppExtension
 9 | from torch.utils.cpp_extension import CUDAExtension
10 | 
11 | requirements = ["torch", "torchvision"]
12 | 
13 | 
14 | def get_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     extensions_dir = os.path.join(this_dir, "model", "csrc")
17 | 
18 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
19 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
20 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
21 | 
22 |     sources = main_file + source_cpu
23 |     extension = CppExtension
24 | 
25 |     extra_compile_args = {"cxx": []}
26 |     define_macros = []
27 | 
28 |     if torch.cuda.is_available() and CUDA_HOME is not None:
29 |         extension = CUDAExtension
30 |         sources += source_cuda
31 |         define_macros += [("WITH_CUDA", None)]
32 |         extra_compile_args["nvcc"] = [
33 |             "-DCUDA_HAS_FP16=1",
34 |             "-D__CUDA_NO_HALF_OPERATORS__",
35 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
36 |             "-D__CUDA_NO_HALF2_OPERATORS__",
37 |         ]
38 | 
39 |     sources = [os.path.join(extensions_dir, s) for s in sources]
40 | 
41 |     include_dirs = [extensions_dir]
42 | 
43 |     ext_modules = [
44 |         extension(
45 |             "model._C",
46 |             sources,
47 |             include_dirs=include_dirs,
48 |             define_macros=define_macros,
49 |             extra_compile_args=extra_compile_args,
50 |         )
51 |     ]
52 | 
53 |     return ext_modules
54 | 
55 | 
56 | setup(
57 |     name="faster_rcnn",
58 |     version="0.1",
59 |     description="object detection in pytorch",
60 |     packages=find_packages(exclude=("configs", "tests",)),
61 |     # install_requires=requirements,
62 |     ext_modules=get_extensions(),
63 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
64 | )
65 | 


--------------------------------------------------------------------------------
/lib/model/roi_layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from model import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/lib/model/utils/losses.py:
--------------------------------------------------------------------------------
 1 | class TripletLoss(nn.Module):
 2 |     """Triplet loss with hard positive/negative mining.
 3 |     
 4 |     Reference:
 5 |         Hermans et al. In Defense of the Triplet Loss for Person Re-Identification. arXiv:1703.07737.
 6 |     
 7 |     Imported from `<https://github.com/Cysu/open-reid/blob/master/reid/loss/triplet.py>`_.
 8 |     
 9 |     Args:
10 |         margin (float, optional): margin for triplet. Default is 0.3.
11 |     """
12 |     
13 |     def __init__(self, margin=0.3,global_feat, labels):
14 |         super(TripletLoss, self).__init__()
15 |         self.margin = margin
16 |         self.ranking_loss = nn.MarginRankingLoss(margin=margin)
17 |  
18 |     def forward(self, inputs, targets):
19 |         """
20 |         Args:
21 |             inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim).
22 |             targets (torch.LongTensor): ground truth labels with shape (num_classes).
23 |         """
24 |         n = inputs.size(0)
25 |         
26 |         # Compute pairwise distance, replace by the official when merged
27 |         dist = torch.pow(inputs, 2).sum(dim=1, keepdim=True).expand(n, n)
28 |         dist = dist + dist.t()
29 |         dist.addmm_(1, -2, inputs, inputs.t())
30 |         dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
31 |         
32 |         # For each anchor, find the hardest positive and negative
33 |         mask = targets.expand(n, n).eq(targets.expand(n, n).t())
34 |         dist_ap, dist_an = [], []
35 |         for i in range(n):
36 |             dist_ap.append(dist[i][mask[i]].max().unsqueeze(0))
37 |             dist_an.append(dist[i][mask[i] == 0].min().unsqueeze(0))
38 |         dist_ap = torch.cat(dist_ap)
39 |         dist_an = torch.cat(dist_an)
40 |         
41 |         # Compute ranking hinge loss
42 |         y = torch.ones_like(dist_an)
43 |         return self.ranking_loss(dist_an, dist_ap, y)
44 | 
45 | triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)
46 | anchor = torch.randn(100, 128, requires_grad=True)
47 | positive = torch.randn(100, 128, requires_grad=True)
48 | negative = torch.randn(100, 128, requires_grad=True)
49 | output = triplet_loss(anchor, positive, negative)


--------------------------------------------------------------------------------
/lib/model/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         if features.is_cuda:
24 |             roi_align.roi_align_forward_cuda(self.aligned_height,
25 |                                              self.aligned_width,
26 |                                              self.spatial_scale, features,
27 |                                              rois, output)
28 |         else:
29 |             roi_align.roi_align_forward(self.aligned_height,
30 |                                         self.aligned_width,
31 |                                         self.spatial_scale, features,
32 |                                         rois, output)
33 | #            raise NotImplementedError
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 |         assert(self.feature_size is not None and grad_output.is_cuda)
39 | 
40 |         batch_size, num_channels, data_height, data_width = self.feature_size
41 | 
42 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
43 |                                   data_width).zero_()
44 |         roi_align.roi_align_backward_cuda(self.aligned_height,
45 |                                           self.aligned_width,
46 |                                           self.spatial_scale, grad_output,
47 |                                           self.rois, grad_input)
48 | 
49 |         # print grad_input
50 | 
51 |         return grad_input, None
52 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 7 |                                  const at::Tensor& rois,
 8 |                                  const float spatial_scale,
 9 |                                  const int pooled_height,
10 |                                  const int pooled_width,
11 |                                  const int sampling_ratio);
12 | 
13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
14 |                                   const at::Tensor& rois,
15 |                                   const float spatial_scale,
16 |                                   const int pooled_height,
17 |                                   const int pooled_width,
18 |                                   const int batch_size,
19 |                                   const int channels,
20 |                                   const int height,
21 |                                   const int width,
22 |                                   const int sampling_ratio);
23 | 
24 | 
25 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
26 |                                 const at::Tensor& rois,
27 |                                 const float spatial_scale,
28 |                                 const int pooled_height,
29 |                                 const int pooled_width);
30 | 
31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
32 |                                  const at::Tensor& input,
33 |                                  const at::Tensor& rois,
34 |                                  const at::Tensor& argmax,
35 |                                  const float spatial_scale,
36 |                                  const int pooled_height,
37 |                                  const int pooled_width,
38 |                                  const int batch_size,
39 |                                  const int channels,
40 |                                  const int height,
41 |                                  const int width);
42 | 
43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
44 | 
45 | 
46 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
47 |                              const int height,
48 |                              const int width);
49 | 


--------------------------------------------------------------------------------
/lib/model/roi_layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from model import _C
 9 | 
10 | import pdb
11 | 
12 | class _ROIAlign(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
15 |         ctx.save_for_backward(roi)
16 |         ctx.output_size = _pair(output_size)
17 |         ctx.spatial_scale = spatial_scale
18 |         ctx.sampling_ratio = sampling_ratio
19 |         ctx.input_shape = input.size()
20 |         output = _C.roi_align_forward(input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         rois, = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         sampling_ratio = ctx.sampling_ratio
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_align_backward(
32 |             grad_output,
33 |             rois,
34 |             spatial_scale,
35 |             output_size[0],
36 |             output_size[1],
37 |             bs,
38 |             ch,
39 |             h,
40 |             w,
41 |             sampling_ratio,
42 |         )
43 |         return grad_input, None, None, None, None
44 | 
45 | 
46 | roi_align = _ROIAlign.apply
47 | 
48 | 
49 | class ROIAlign(nn.Module):
50 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
51 |         super(ROIAlign, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 |         self.sampling_ratio = sampling_ratio
55 | 
56 |     def forward(self, input, rois):
57 |         return roi_align(
58 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
59 |         )
60 | 
61 |     def __repr__(self):
62 |         tmpstr = self.__class__.__name__ + "("
63 |         tmpstr += "output_size=" + str(self.output_size)
64 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
65 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
66 |         tmpstr += ")"
67 |         return tmpstr
68 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/functions/gridgen.py:
--------------------------------------------------------------------------------
 1 | # functions/add.py
 2 | import torch
 3 | from torch.autograd import Function
 4 | import numpy as np
 5 | 
 6 | 
 7 | class AffineGridGenFunction(Function):
 8 |     def __init__(self, height, width,lr=1):
 9 |         super(AffineGridGenFunction, self).__init__()
10 |         self.lr = lr
11 |         self.height, self.width = height, width
12 |         self.grid = np.zeros( [self.height, self.width, 3], dtype=np.float32)
13 |         self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height)), 0), repeats = self.width, axis = 0).T, 0)
14 |         self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width)), 0), repeats = self.height, axis = 0), 0)
15 |         # self.grid[:,:,0] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.height - 1)), 0), repeats = self.width, axis = 0).T, 0)
16 |         # self.grid[:,:,1] = np.expand_dims(np.repeat(np.expand_dims(np.arange(-1, 1, 2.0/(self.width - 1)), 0), repeats = self.height, axis = 0), 0)
17 |         self.grid[:,:,2] = np.ones([self.height, width])
18 |         self.grid = torch.from_numpy(self.grid.astype(np.float32))
19 |         #print(self.grid)
20 | 
21 |     def forward(self, input1):
22 |         self.input1 = input1
23 |         output = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
24 |         self.batchgrid = input1.new(torch.Size([input1.size(0)]) + self.grid.size()).zero_()
25 |         for i in range(input1.size(0)):
26 |             self.batchgrid[i] = self.grid.astype(self.batchgrid[i])
27 | 
28 |         # if input1.is_cuda:
29 |         #    self.batchgrid = self.batchgrid.cuda()
30 |         #    output = output.cuda()
31 | 
32 |         for i in range(input1.size(0)):
33 |             output = torch.bmm(self.batchgrid.view(-1, self.height*self.width, 3), torch.transpose(input1, 1, 2)).view(-1, self.height, self.width, 2)
34 | 
35 |         return output
36 | 
37 |     def backward(self, grad_output):
38 | 
39 |         grad_input1 = self.input1.new(self.input1.size()).zero_()
40 | 
41 |         # if grad_output.is_cuda:
42 |         #    self.batchgrid = self.batchgrid.cuda()
43 |         #    grad_input1 = grad_input1.cuda()
44 | 
45 |         grad_input1 = torch.baddbmm(grad_input1, torch.transpose(grad_output.view(-1, self.height*self.width, 2), 1,2), self.batchgrid.view(-1, self.height*self.width, 3))
46 |         return grad_input1
47 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/lib/model/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import scipy.misc 
 5 | try:
 6 |     from StringIO import StringIO  # Python 2.7
 7 | except ImportError:
 8 |     from io import BytesIO         # Python 3.x
 9 | 
10 | 
11 | class Logger(object):
12 |     
13 |     def __init__(self, log_dir):
14 |         """Create a summary writer logging to log_dir."""
15 |         self.writer = tf.summary.FileWriter(log_dir)
16 | 
17 |     def scalar_summary(self, tag, value, step):
18 |         """Log a scalar variable."""
19 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
20 |         self.writer.add_summary(summary, step)
21 | 
22 |     def image_summary(self, tag, images, step):
23 |         """Log a list of images."""
24 | 
25 |         img_summaries = []
26 |         for i, img in enumerate(images):
27 |             # Write the image to a string
28 |             try:
29 |                 s = StringIO()
30 |             except:
31 |                 s = BytesIO()
32 |             scipy.misc.toimage(img).save(s, format="png")
33 | 
34 |             # Create an Image object
35 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
36 |                                        height=img.shape[0],
37 |                                        width=img.shape[1])
38 |             # Create a Summary value
39 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
40 | 
41 |         # Create and write Summary
42 |         summary = tf.Summary(value=img_summaries)
43 |         self.writer.add_summary(summary, step)
44 |         
45 |     def histo_summary(self, tag, values, step, bins=1000):
46 |         """Log a histogram of the tensor of values."""
47 | 
48 |         # Create a histogram using numpy
49 |         counts, bin_edges = np.histogram(values, bins=bins)
50 | 
51 |         # Fill the fields of the histogram proto
52 |         hist = tf.HistogramProto()
53 |         hist.min = float(np.min(values))
54 |         hist.max = float(np.max(values))
55 |         hist.num = int(np.prod(values.shape))
56 |         hist.sum = float(np.sum(values))
57 |         hist.sum_squares = float(np.sum(values**2))
58 | 
59 |         # Drop the start of the first bin
60 |         bin_edges = bin_edges[1:]
61 | 
62 |         # Add bin edges and counts
63 |         for edge in bin_edges:
64 |             hist.bucket_limit.append(edge)
65 |         for c in counts:
66 |             hist.bucket.append(c)
67 | 
68 |         # Create and write Summary
69 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
70 |         self.writer.add_summary(summary, step)
71 |         self.writer.flush()
72 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | 
 6 | int BilinearSamplerBHWD_updateOutput_cuda_kernel(/*output->size[3]*/int oc,
 7 |                                                  /*output->size[2]*/int ow,
 8 |                                                  /*output->size[1]*/int oh,
 9 |                                                  /*output->size[0]*/int ob,
10 |                                                  /*THCudaTensor_size(state, inputImages, 3)*/int ic,
11 |                                                  /*THCudaTensor_size(state, inputImages, 1)*/int ih,
12 |                                                  /*THCudaTensor_size(state, inputImages, 2)*/int iw,
13 |                                                  /*THCudaTensor_size(state, inputImages, 0)*/int ib,
14 |                                                  /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
15 |                                                  /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
16 |                                                  /*THCudaTensor *output*/float *output, int osb, int osc, int osh, int osw,
17 |                                                  /*THCState_getCurrentStream(state)*/cudaStream_t stream);
18 | 
19 | int BilinearSamplerBHWD_updateGradInput_cuda_kernel(/*gradOutput->size[3]*/int goc,
20 |                                                     /*gradOutput->size[2]*/int gow,
21 |                                                     /*gradOutput->size[1]*/int goh,
22 |                                                     /*gradOutput->size[0]*/int gob,
23 |                                                     /*THCudaTensor_size(state, inputImages, 3)*/int ic,
24 |                                                     /*THCudaTensor_size(state, inputImages, 1)*/int ih,
25 |                                                     /*THCudaTensor_size(state, inputImages, 2)*/int iw,
26 |                                                     /*THCudaTensor_size(state, inputImages, 0)*/int ib,
27 |                                                     /*THCudaTensor *inputImages*/float *inputImages, int isb, int isc, int ish, int isw,
28 |                                                     /*THCudaTensor *grids*/float *grids, int gsb, int gsc, int gsh, int gsw,
29 |                                                     /*THCudaTensor *gradInputImages*/float *gradInputImages, int gisb, int gisc, int gish, int gisw,
30 |                                                     /*THCudaTensor *gradGrids*/float *gradGrids, int ggsb, int ggsc, int ggsh, int ggsw,
31 |                                                     /*THCudaTensor *gradOutput*/float *gradOutput, int gosb, int gosc, int gosh, int gosw,
32 |                                                     /*THCState_getCurrentStream(state)*/cudaStream_t stream);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/general_test_loader.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data as data
 2 | import torch
 3 | import numpy as np
 4 | import random
 5 | import cv2
 6 | from PIL import Image
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | from model.utils.config import cfg
10 | from model.rpn.bbox_transform import bbox_transform_inv, clip_boxes
11 | from roi_data_layer.minibatch import get_minibatch, get_minibatch
12 | 
13 | 
14 | class GeneralTestLoader(data.Dataset):
15 |     def __init__(self, roidb, ratio_list, ratio_index, batch_size, training=True, normalize=None):
16 |         self._roidb = roidb
17 |         # we make the height of image consistent to trim_height, trim_width
18 |         self.trim_height = cfg.TRAIN.TRIM_HEIGHT
19 |         self.trim_width = cfg.TRAIN.TRIM_WIDTH
20 |         self.max_num_box = cfg.MAX_NUM_GT_BOXES
21 |         self.training = training
22 |         self.normalize = normalize
23 |         self.ratio_list = ratio_list
24 |         self.ratio_index = ratio_index
25 |         self.batch_size = batch_size
26 |         self.data_size = len(self.ratio_list)
27 | 
28 |         # given the ratio_list, we want to make the ratio same for each batch.
29 |         self.ratio_list_batch = torch.Tensor(self.data_size).zero_()
30 |         num_batch = int(np.ceil(len(ratio_index) / batch_size))
31 |         for i in range(num_batch):
32 |             left_idx = i*batch_size
33 |             right_idx = min((i+1)*batch_size-1, self.data_size-1)
34 | 
35 |             if ratio_list[right_idx] < 1:
36 |                 # for ratio < 1, we preserve the leftmost in each batch.
37 |                 target_ratio = ratio_list[left_idx]
38 |             elif ratio_list[left_idx] > 1:
39 |                 # for ratio > 1, we preserve the rightmost in each batch.
40 |                 target_ratio = ratio_list[right_idx]
41 |             else:
42 |                 # for ratio cross 1, we make it to be 1.
43 |                 target_ratio = 1
44 | 
45 |             self.ratio_list_batch[left_idx:(right_idx+1)] = target_ratio
46 | 
47 | 
48 |     def __getitem__(self, index):
49 |         index_ratio = index
50 | 
51 |         # get the anchor index for current sample index
52 |         # here we set the anchor index to the last one
53 |         # sample in this group
54 |         minibatch_db = [self._roidb[index_ratio]]
55 |         blobs = get_minibatch(minibatch_db)
56 |         data = torch.from_numpy(blobs['data'])
57 |         im_info = torch.from_numpy(blobs['im_info'])  # (H, W, scale)
58 |         # we need to random shuffle the bounding box.
59 |         data_height, data_width = data.size(1), data.size(2)
60 | 
61 |         data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
62 |         im_info = im_info.view(3)
63 | 
64 |         # gt_boxes = torch.FloatTensor([1,1,1,1,1])
65 |         gt_boxes = torch.from_numpy(blobs['gt_boxes'])
66 |         num_boxes = 0
67 | 
68 |         return data, im_info, gt_boxes, num_boxes
69 | 
70 |     def __len__(self):
71 |         return len(self._roidb)


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/lib/datasets/debug.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 3,
 6 |    "metadata": {},
 7 |    "outputs": [
 8 |     {
 9 |      "ename": "ModuleNotFoundError",
10 |      "evalue": "No module named '__main__.imdb'; '__main__' is not a package",
11 |      "output_type": "error",
12 |      "traceback": [
13 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
14 |       "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
15 |       "\u001b[0;32m<ipython-input-3-d6e7ee16cd02>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mxml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0metree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mElementTree\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mET\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mimdb\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mimdb\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     15\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mimdb\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mROOT_DIR\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0;34m.\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mds_utils\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
16 |       "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named '__main__.imdb'; '__main__' is not a package"
17 |      ]
18 |     }
19 |    ],
20 |    "source": [
21 |     "import xml.dom.minidom as minidom\n",
22 |     "\n",
23 |     "import os\n",
24 |     "# import PIL\n",
25 |     "import numpy as np\n",
26 |     "import scipy.sparse\n",
27 |     "import subprocess\n",
28 |     "import math\n",
29 |     "import glob\n",
30 |     "import uuid\n",
31 |     "import scipy.io as sio\n",
32 |     "import xml.etree.ElementTree as ET\n",
33 |     "import pickle\n",
34 |     "from .imdb import imdb\n",
35 |     "from .imdb import ROOT_DIR\n",
36 |     "from . import ds_utils\n",
37 |     "from .voc_eval import voc_eval\n",
38 |     "\n",
39 |     "# TODO: make fast_rcnn irrelevant\n",
40 |     "# >>>> obsolete, because it depends on sth outside of this project\n",
41 |     "from model.utils.config import cfg"
42 |    ]
43 |   },
44 |   {
45 |    "cell_type": "code",
46 |    "execution_count": null,
47 |    "metadata": {},
48 |    "outputs": [],
49 |    "source": []
50 |   }
51 |  ],
52 |  "metadata": {
53 |   "kernelspec": {
54 |    "display_name": "pytorch-rcnn",
55 |    "language": "python",
56 |    "name": "pytorch-rcnn"
57 |   },
58 |   "language_info": {
59 |    "codemirror_mode": {
60 |     "name": "ipython",
61 |     "version": 3
62 |    },
63 |    "file_extension": ".py",
64 |    "mimetype": "text/x-python",
65 |    "name": "python",
66 |    "nbconvert_exporter": "python",
67 |    "pygments_lexer": "ipython3",
68 |    "version": "3.7.3"
69 |   }
70 |  },
71 |  "nbformat": 4,
72 |  "nbformat_minor": 2
73 | }
74 | 


--------------------------------------------------------------------------------
/env.yml:
--------------------------------------------------------------------------------
  1 | name: DAnA
  2 | channels:
  3 |   - pytorch
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=main
  8 |   - _pytorch_select=0.1=cpu_0
  9 |   - absl-py=0.9.0=py36_0
 10 |   - blas=1.0=mkl
 11 |   - blinker=1.4=py_1
 12 |   - brotlipy=0.7.0=py36h8c4c3a4_1000
 13 |   - c-ares=1.15.0=h516909a_1001
 14 |   - ca-certificates=2020.6.24=0
 15 |   - cachetools=4.1.0=py_1
 16 |   - certifi=2020.6.20=py36_0
 17 |   - cffi=1.14.0=py36h2e261b9_0
 18 |   - chardet=3.0.4=py36h9f0ad1d_1006
 19 |   - click=7.1.2=pyh9f0ad1d_0
 20 |   - cryptography=2.9.2=py36h45558ae_0
 21 |   - cudatoolkit=10.0.130=0
 22 |   - freetype=2.9.1=h8a8886c_1
 23 |   - google-auth=1.14.3=pyh9f0ad1d_0
 24 |   - google-auth-oauthlib=0.4.1=py_2
 25 |   - grpcio=1.27.2=py36hf8bcb03_0
 26 |   - idna=2.9=py_1
 27 |   - importlib-metadata=1.6.0=py36h9f0ad1d_0
 28 |   - intel-openmp=2020.0=166
 29 |   - jpeg=9b=h024ee3a_2
 30 |   - ld_impl_linux-64=2.33.1=h53a641e_7
 31 |   - libedit=3.1.20181209=hc058e9b_0
 32 |   - libffi=3.2.1=hd88cf55_4
 33 |   - libgcc-ng=9.1.0=hdf63c60_0
 34 |   - libgfortran-ng=7.3.0=hdf63c60_0
 35 |   - libpng=1.6.37=hbc83047_0
 36 |   - libprotobuf=3.11.4=h8b12597_0
 37 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 38 |   - libtiff=4.1.0=h2733197_0
 39 |   - markdown=3.2.2=py_0
 40 |   - mkl=2020.0=166
 41 |   - mkl-service=2.3.0=py36he904b0f_0
 42 |   - mkl_fft=1.0.15=py36ha843d7b_0
 43 |   - mkl_random=1.1.0=py36hd6b4f25_0
 44 |   - ncurses=6.2=he6710b0_0
 45 |   - ninja=1.9.0=py36hfd86e86_0
 46 |   - numpy=1.18.1=py36h4f9e942_0
 47 |   - numpy-base=1.18.1=py36hde5b4d6_1
 48 |   - oauthlib=3.0.1=py_0
 49 |   - olefile=0.46=py36_0
 50 |   - openssl=1.1.1g=h516909a_0
 51 |   - pillow=6.1.0=py36h34e0f95_0
 52 |   - pip=20.0.2=py36_1
 53 |   - pyasn1=0.4.8=py_0
 54 |   - pyasn1-modules=0.2.7=py_0
 55 |   - pycparser=2.20=py_0
 56 |   - pyjwt=1.7.1=py_0
 57 |   - pyopenssl=19.1.0=py_1
 58 |   - pysocks=1.7.1=py36h9f0ad1d_1
 59 |   - python=3.6.10=hcf32534_1
 60 |   - python_abi=3.6=1_cp36m
 61 |   - pytorch=1.2.0=py3.6_cuda10.0.130_cudnn7.6.2_0
 62 |   - readline=8.0=h7b6447c_0
 63 |   - requests=2.23.0=pyh8c360ce_2
 64 |   - requests-oauthlib=1.2.0=py_0
 65 |   - rsa=4.0=py_0
 66 |   - setuptools=46.1.3=py36_0
 67 |   - six=1.14.0=py36_0
 68 |   - sqlite=3.31.1=h7b6447c_0
 69 |   - tensorboard=2.1.1=py_1
 70 |   - tk=8.6.8=hbc83047_0
 71 |   - torchvision=0.4.0=py36_cu100
 72 |   - urllib3=1.25.9=py_0
 73 |   - werkzeug=1.0.1=pyh9f0ad1d_0
 74 |   - wheel=0.34.2=py36_0
 75 |   - xz=5.2.4=h14c3975_4
 76 |   - zipp=3.1.0=py_0
 77 |   - zlib=1.2.11=h7b6447c_3
 78 |   - zstd=1.3.7=h0b5b093_0
 79 |   - pip:
 80 |     - cycler==0.10.0
 81 |     - cython==0.29.16
 82 |     - easydict==1.9
 83 |     - future==0.18.2
 84 |     - kiwisolver==1.2.0
 85 |     - matplotlib==3.2.1
 86 |     - msgpack==1.0.0
 87 |     - opencv-python==4.2.0.34
 88 |     - packaging==20.3
 89 |     - pandas==1.0.3
 90 |     - protobuf==3.11.3
 91 |     - pyparsing==2.4.7
 92 |     - python-box==4.2.3
 93 |     - python-dateutil==2.8.1
 94 |     - pytz==2020.1
 95 |     - pyyaml==5.3.1
 96 |     - ruamel-yaml==0.16.10
 97 |     - ruamel-yaml-clib==0.2.0
 98 |     - scipy==1.1.0
 99 |     - tensorboardx==2.0
100 |     - toml==0.10.1
101 |     - torchsummary==1.5.1
102 |     - tqdm==4.48.0
103 | prefix: /home/tony/anaconda3/envs/rcnn36
104 | 
105 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick and Xinlei Chen
 6 | # --------------------------------------------------------
 7 | 
 8 | """Compute minibatch blobs for training a Fast R-CNN network."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | import numpy as np
14 | import numpy.random as npr
15 | from scipy.misc import imread
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | 
20 | 
21 | # def get_minibatch(roidb, num_classes):
22 | def get_minibatch(roidb):
23 |   """Given a roidb, construct a minibatch sampled from it."""
24 |    # num_images here is always 1, independent of bs
25 |    # because this func is called by __getitem__() of dataset
26 |   num_images = len(roidb)
27 | 
28 |   # Sample random scales to use for each image in this batch
29 |   random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
30 |                   size=num_images)
31 |   assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
32 |     'num_images ({}) must divide BATCH_SIZE ({})'. \
33 |     format(num_images, cfg.TRAIN.BATCH_SIZE)
34 | 
35 |   # Get the input image blob, formatted for caffe
36 |   im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
37 |   # each im_blob has different H, same W (ex. [1, 600, 899, 3])
38 | 
39 |   blobs = {'data': im_blob}
40 | 
41 |   assert len(im_scales) == 1, "Single batch only"
42 |   assert len(roidb) == 1, "Single batch only"
43 |   
44 |   # gt boxes: (x1, y1, x2, y2, cls)
45 |   if cfg.TRAIN.USE_ALL_GT:
46 |     # Include all ground truth boxes
47 |     gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
48 |   else:
49 |     # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 
50 |     gt_inds = np.where((roidb[0]['gt_classes'] != 0) & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
51 | 
52 |   gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
53 |   gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
54 |   gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
55 |   blobs['gt_boxes'] = gt_boxes
56 |   blobs['im_info'] = np.array(
57 |     [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
58 |     dtype=np.float32)
59 | 
60 |   blobs['img_id'] = roidb[0]['img_id']
61 | 
62 |   return blobs
63 | 
64 | def _get_image_blob(roidb, scale_inds):
65 |   """Builds an input blob from the images in the roidb at the specified
66 |   scales.
67 |   """
68 |   num_images = len(roidb)
69 | 
70 |   processed_ims = []
71 |   im_scales = []
72 |   for i in range(num_images):
73 |     #im = cv2.imread(roidb[i]['image'])
74 |     im = imread(roidb[i]['image'])
75 | 
76 |     if len(im.shape) == 2:
77 |       im = im[:,:,np.newaxis]
78 |       im = np.concatenate((im,im,im), axis=2)
79 |     # flip the channel, since the original one using cv2
80 |     # rgb -> bgr
81 |     im = im[:,:,::-1]
82 | 
83 |     if roidb[i]['flipped']:
84 |       im = im[:, ::-1, :]
85 |     target_size = cfg.TRAIN.SCALES[scale_inds[i]]
86 |     # normalize (minus the mean) and scale, return the scaled_img & scale 
87 |     im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
88 |                     cfg.TRAIN.MAX_SIZE)
89 |     im_scales.append(im_scale)
90 |     processed_ims.append(im)
91 | 
92 |   # Create a blob to hold the input images
93 |   # im_list_to_blob has zero padding for different size of imgs
94 |   blob = im_list_to_blob(processed_ims)  # a np.array image of [B, H, W, C]
95 | 
96 |   return blob, im_scales  # batch of imgs, list of scales
97 | 


--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | 
  9 | import numpy as np
 10 | import pdb
 11 | 
 12 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
 13 | #
 14 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
 15 | #    >> anchors
 16 | #
 17 | #    anchors =
 18 | #
 19 | #       -83   -39   100    56
 20 | #      -175   -87   192   104
 21 | #      -359  -183   376   200
 22 | #       -55   -55    72    72
 23 | #      -119  -119   136   136
 24 | #      -247  -247   264   264
 25 | #       -35   -79    52    96
 26 | #       -79  -167    96   184
 27 | #      -167  -343   184   360
 28 | 
 29 | #array([[ -83.,  -39.,  100.,   56.],
 30 | #       [-175.,  -87.,  192.,  104.],
 31 | #       [-359., -183.,  376.,  200.],
 32 | #       [ -55.,  -55.,   72.,   72.],
 33 | #       [-119., -119.,  136.,  136.],
 34 | #       [-247., -247.,  264.,  264.],
 35 | #       [ -35.,  -79.,   52.,   96.],
 36 | #       [ -79., -167.,   96.,  184.],
 37 | #       [-167., -343.,  184.,  360.]])
 38 | 
 39 | try:
 40 |     xrange          # Python 2
 41 | except NameError:
 42 |     xrange = range  # Python 3
 43 | 
 44 | 
 45 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 46 |                      scales=2**np.arange(3, 6)):
 47 |     """
 48 |     Generate anchor (reference) windows by enumerating aspect ratios X
 49 |     scales wrt a reference (0, 0, 15, 15) window.
 50 |     """
 51 | 
 52 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 53 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 54 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 55 |                          for i in xrange(ratio_anchors.shape[0])])
 56 |     return anchors
 57 | 
 58 | def _whctrs(anchor):
 59 |     """
 60 |     Return width, height, x center, and y center for an anchor (window).
 61 |     """
 62 | 
 63 |     w = anchor[2] - anchor[0] + 1
 64 |     h = anchor[3] - anchor[1] + 1
 65 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 66 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 67 |     return w, h, x_ctr, y_ctr
 68 | 
 69 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 70 |     """
 71 |     Given a vector of widths (ws) and heights (hs) around a center
 72 |     (x_ctr, y_ctr), output a set of anchors (windows).
 73 |     """
 74 | 
 75 |     ws = ws[:, np.newaxis]
 76 |     hs = hs[:, np.newaxis]
 77 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 78 |                          y_ctr - 0.5 * (hs - 1),
 79 |                          x_ctr + 0.5 * (ws - 1),
 80 |                          y_ctr + 0.5 * (hs - 1)))
 81 |     return anchors
 82 | 
 83 | def _ratio_enum(anchor, ratios):
 84 |     """
 85 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 86 |     """
 87 | 
 88 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 89 |     size = w * h
 90 |     size_ratios = size / ratios
 91 |     ws = np.round(np.sqrt(size_ratios))
 92 |     hs = np.round(ws * ratios)
 93 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 94 |     return anchors
 95 | 
 96 | def _scale_enum(anchor, scales):
 97 |     """
 98 |     Enumerate a set of anchors for each scale wrt an anchor.
 99 |     """
100 | 
101 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
102 |     ws = w * scales
103 |     hs = h * scales
104 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
105 |     return anchors
106 | 
107 | if __name__ == '__main__':
108 |     import time
109 |     t = time.time()
110 |     a = generate_anchors()
111 |     print(time.time() - t)
112 |     print(a)
113 |     from IPython import embed; embed()
114 | 


--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
 16 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 17 |     return bbox_overlaps_c(boxes, query_boxes)
 18 | 
 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 20 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 21 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 22 |     """
 23 |     Parameters
 24 |     ----------
 25 |     boxes: (N, 4) ndarray of float
 26 |     query_boxes: (K, 4) ndarray of float
 27 |     Returns
 28 |     -------
 29 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 30 |     """
 31 |     cdef unsigned int N = boxes.shape[0]
 32 |     cdef unsigned int K = query_boxes.shape[0]
 33 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 34 |     cdef DTYPE_t iw, ih, box_area
 35 |     cdef DTYPE_t ua
 36 |     cdef unsigned int k, n
 37 |     for k in range(K):
 38 |         box_area = (
 39 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 40 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 41 |         )
 42 |         for n in range(N):
 43 |             iw = (
 44 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 45 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 46 |             )
 47 |             if iw > 0:
 48 |                 ih = (
 49 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 50 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 51 |                 )
 52 |                 if ih > 0:
 53 |                     ua = float(
 54 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 55 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 56 |                         box_area - iw * ih
 57 |                     )
 58 |                     overlaps[n, k] = iw * ih / ua
 59 |     return overlaps
 60 | 
 61 | 
 62 | def bbox_intersections(
 63 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 64 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 65 |     return bbox_intersections_c(boxes, query_boxes)
 66 | 
 67 | 
 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 69 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 70 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 71 |     """
 72 |     For each query box compute the intersection ratio covered by boxes
 73 |     ----------
 74 |     Parameters
 75 |     ----------
 76 |     boxes: (N, 4) ndarray of float
 77 |     query_boxes: (K, 4) ndarray of float
 78 |     Returns
 79 |     -------
 80 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 81 |     """
 82 |     cdef unsigned int N = boxes.shape[0]
 83 |     cdef unsigned int K = query_boxes.shape[0]
 84 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 85 |     cdef DTYPE_t iw, ih, box_area
 86 |     cdef DTYPE_t ua
 87 |     cdef unsigned int k, n
 88 |     for k in range(K):
 89 |         box_area = (
 90 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 91 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 92 |         )
 93 |         for n in range(N):
 94 |             iw = (
 95 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 96 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 97 |             )
 98 |             if iw > 0:
 99 |                 ih = (
100 |                     min(boxes[n, 3], query_boxes[k, 3]) -
101 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
102 |                 )
103 |                 if ih > 0:
104 |                     intersec[n, k] = iw * ih / box_area
105 |     return intersec


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | import numpy as np
  5 | __sets = {}
  6 | from datasets.pascal_voc import pascal_voc
  7 | from datasets.coco import coco
  8 | from datasets.coco_split import coco_split
  9 | from datasets.imagenet import imagenet
 10 | from datasets.vg import vg
 11 | from datasets.episode import episode
 12 | from datasets.ycb2d import ycb2d
 13 | 
 14 | for i in [256, 240, 224, 208, 200, 192, 160, 128, 100, 96, 80, 64, 50, 48, 32, 30, 20, 16, 10]:
 15 |   name = f'ycb2d_replace{i}'
 16 |   __sets[name] = (lambda split='replace', year=str(i): ycb2d(split, year))
 17 | 
 18 | name = 'ycb2d_inference_sparse'
 19 | __sets[name] = (lambda split='inference', year='sparse': ycb2d(split, year))
 20 | name = 'ycb2d_inferencefs_sparse'
 21 | __sets[name] = (lambda split='inferencefs', year='sparse': ycb2d(split, year))
 22 | name = 'ycb2d_inference_dense'
 23 | __sets[name] = (lambda split='inference', year='dense': ycb2d(split, year))
 24 | name = 'ycb2d_inferencefs_dense'
 25 | __sets[name] = (lambda split='inferencefs', year='dense': ycb2d(split, year))
 26 | name = 'ycb2d_inference'
 27 | __sets[name] = (lambda split='inference', year='1234': ycb2d(split, year))
 28 | 
 29 | for i in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, '1cls', '2cls', '3cls', '4cls']:
 30 |   name = f'ycb2d_stage{i}'
 31 |   __sets[name] = (lambda split='stage', year=str(i): ycb2d(split, year))
 32 | for i in [512, 256, 128, 64, 32, 16, 8]:
 33 |   name = f'ycb2d_oracle{i}'
 34 |   __sets[name] = (lambda split='oracle', year=str(i): ycb2d(split, year))
 35 | for i in [64, 32, 16]:
 36 |   name = f'ycb2d_oracle_dense{i}'
 37 |   __sets[name] = (lambda split='oracledense', year=str(i): ycb2d(split, year))
 38 | for i in [20, 10, 5]:
 39 |   name = f'ycb2d_fsoracle_dense{i}'
 40 |   __sets[name] = (lambda split='fsoracledense', year=str(i): ycb2d(split, year))
 41 | 
 42 | name = 'ycb2d_pseudo'
 43 | for i in range(1, 10):
 44 |   __sets[name+str(i)] = (lambda split='pseudo', year=str(i): ycb2d(split, year))
 45 | 
 46 | __sets['coco_ft'] = (lambda split='shot', year='10': coco_split(split, year))
 47 | 
 48 | # coco 20 evaluation
 49 | for year in ['set1', 'set2']:
 50 |   for split in ['3way', '5way']:
 51 |     name = 'coco_{}_{}'.format(split, year)
 52 |     __sets[name] = (lambda split=split, year=year: coco_split(split, year))
 53 | 
 54 | # vis
 55 | for year in ['set1', 'set2', 'set3', 'set4']:
 56 |   for split in ['vis']:
 57 |     name = 'coco_{}_{}'.format(split, year)
 58 |     __sets[name] = (lambda split=split, year=year: coco_split(split, year))
 59 | 
 60 | # coco 20 evaluation
 61 | for year in ['set1', 'set2', 'set3', 'set4']:
 62 |   for split in ['20']:
 63 |     name = 'coco_{}_{}'.format(split, year)
 64 |     __sets[name] = (lambda split=split, year=year: coco_split(split, year))
 65 | 
 66 | # coco 60 training
 67 | for year in ['set1', 'set2', 'set3', 'set4', 'set1allcat']:
 68 |   for split in ['60']:
 69 |     name = 'coco_{}_{}'.format(split, year)
 70 |     __sets[name] = (lambda split=split, year=year: coco_split(split, year))
 71 | 
 72 | # episode
 73 | for year in ['novel', 'base', 'val']:
 74 |   for n in range(600): 
 75 |     split = 'ep' + str(n)
 76 |     name = 'coco_{}_{}'.format(year, split)
 77 |     __sets[name] = (lambda split=split, year=year: episode(split, year))
 78 | 
 79 | 
 80 | # Set up voc_<year>_<split>
 81 | for year in ['2007', '2012']:
 82 |   for split in ['train', 'val', 'trainval', 'test']:
 83 |     name = 'voc_{}_{}'.format(year, split)
 84 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
 85 | 
 86 | # Set up coco_2014_<split>
 87 | for year in ['2014']:
 88 |   for split in ['train', 'val', 'minival', 'valminusminival', 'trainval']:
 89 |     name = 'coco_{}_{}'.format(year, split)
 90 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
 91 | 
 92 | 
 93 | def get_imdb(name):
 94 |   """Get an imdb (image database) by name."""
 95 |   if name not in __sets:
 96 |     raise KeyError('Unknown dataset: {}'.format(name))
 97 |   return __sets[name]()
 98 | 
 99 | 
100 | def list_imdbs():
101 |   """List all registered imdbs."""
102 |   return list(__sets.keys())
103 | 


--------------------------------------------------------------------------------
/lib/model/roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/lib/datasets/vg_eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Fast/er R-CNN
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Bharath Hariharan
  6 | # --------------------------------------------------------
  7 | 
  8 | import xml.etree.ElementTree as ET
  9 | import os
 10 | import numpy as np
 11 | from .voc_eval import voc_ap
 12 | 
 13 | def vg_eval( detpath,
 14 |              gt_roidb,
 15 |              image_index,
 16 |              classindex,
 17 |              ovthresh=0.5,
 18 |              use_07_metric=False,
 19 |              eval_attributes=False):
 20 |     """rec, prec, ap, sorted_scores, npos = voc_eval(
 21 |                                 detpath, 
 22 |                                 gt_roidb,
 23 |                                 image_index,
 24 |                                 classindex,
 25 |                                 [ovthresh],
 26 |                                 [use_07_metric])
 27 | 
 28 |     Top level function that does the Visual Genome evaluation.
 29 | 
 30 |     detpath: Path to detections
 31 |     gt_roidb: List of ground truth structs.
 32 |     image_index: List of image ids.
 33 |     classindex: Category index
 34 |     [ovthresh]: Overlap threshold (default = 0.5)
 35 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 36 |         (default False)
 37 |     """
 38 |     # extract gt objects for this class
 39 |     class_recs = {}
 40 |     npos = 0
 41 |     for item,imagename in zip(gt_roidb,image_index):
 42 |         if eval_attributes:
 43 |             bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
 44 |         else:
 45 |             bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
 46 |         difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
 47 |         det = [False] * bbox.shape[0]
 48 |         npos = npos + sum(~difficult)        
 49 |         class_recs[str(imagename)] = {'bbox': bbox,
 50 |                                  'difficult': difficult,
 51 |                                  'det': det}
 52 |     if npos == 0:
 53 |         # No ground truth examples
 54 |         return 0,0,0,0,npos
 55 | 
 56 |     # read dets
 57 |     with open(detpath, 'r') as f:
 58 |         lines = f.readlines()
 59 |     if len(lines) == 0:
 60 |         # No detection examples
 61 |         return 0,0,0,0,npos
 62 | 
 63 |     splitlines = [x.strip().split(' ') for x in lines]
 64 |     image_ids = [x[0] for x in splitlines]
 65 |     confidence = np.array([float(x[1]) for x in splitlines])
 66 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 67 | 
 68 |     # sort by confidence
 69 |     sorted_ind = np.argsort(-confidence)
 70 |     sorted_scores = -np.sort(-confidence)
 71 |     BB = BB[sorted_ind, :]
 72 |     image_ids = [image_ids[x] for x in sorted_ind]
 73 | 
 74 |     # go down dets and mark TPs and FPs
 75 |     nd = len(image_ids)
 76 |     tp = np.zeros(nd)
 77 |     fp = np.zeros(nd)
 78 |     for d in range(nd):
 79 |         R = class_recs[image_ids[d]]
 80 |         bb = BB[d, :].astype(float)
 81 |         ovmax = -np.inf
 82 |         BBGT = R['bbox'].astype(float)
 83 | 
 84 |         if BBGT.size > 0:
 85 |             # compute overlaps
 86 |             # intersection
 87 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
 88 |             iymin = np.maximum(BBGT[:, 1], bb[1])
 89 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
 90 |             iymax = np.minimum(BBGT[:, 3], bb[3])
 91 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
 92 |             ih = np.maximum(iymax - iymin + 1., 0.)
 93 |             inters = iw * ih
 94 | 
 95 |             # union
 96 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
 97 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
 98 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
 99 | 
100 |             overlaps = inters / uni
101 |             ovmax = np.max(overlaps)
102 |             jmax = np.argmax(overlaps)
103 | 
104 |         if ovmax > ovthresh:
105 |             if not R['difficult'][jmax]:
106 |                 if not R['det'][jmax]:
107 |                     tp[d] = 1.
108 |                     R['det'][jmax] = 1
109 |                 else:
110 |                     fp[d] = 1.
111 |         else:
112 |             fp[d] = 1.
113 | 
114 |     # compute precision recall
115 |     fp = np.cumsum(fp)
116 |     tp = np.cumsum(tp)
117 |     rec = tp / float(npos)
118 |     # avoid divide by zero in case the first detection matches a difficult
119 |     # ground truth
120 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
121 |     ap = voc_ap(rec, prec, use_07_metric)
122 |     
123 |     return rec, prec, ap, sorted_scores, npos
124 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import datasets
  7 | import numpy as np
  8 | from model.utils.config import cfg
  9 | from datasets.factory import get_imdb
 10 | import PIL
 11 | import pdb
 12 | 
 13 | def prepare_roidb(imdb):
 14 |   """Enrich the imdb's roidb by adding some derived quantities that
 15 |   are useful for training. This function precomputes the maximum
 16 |   overlap, taken over ground-truth boxes, between each ROI and
 17 |   each ground-truth box. The class with maximum overlap is also
 18 |   recorded.
 19 |   """
 20 | 
 21 |   roidb = imdb.roidb
 22 |   if not (imdb.name.startswith('coco')):
 23 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 24 |          for i in range(imdb.num_images)]
 25 |   for i in range(len(imdb.image_index)):
 26 |     roidb[i]['img_id'] = imdb.image_id_at(i)
 27 |     roidb[i]['image'] = imdb.image_path_at(i)
 28 |     if not (imdb.name.startswith('coco')):
 29 |       roidb[i]['width'] = sizes[i][0]
 30 |       roidb[i]['height'] = sizes[i][1]
 31 |     # need gt_overlaps as a dense array for argmax
 32 |     # (num_obj, num_class)
 33 |     gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 34 |     # max overlap with gt over classes (columns)
 35 |     max_overlaps = gt_overlaps.max(axis=1)
 36 |     # gt class that had the max overlap
 37 |     max_classes = gt_overlaps.argmax(axis=1)
 38 |     roidb[i]['max_classes'] = max_classes
 39 |     roidb[i]['max_overlaps'] = max_overlaps
 40 |     # sanity checks
 41 |     # max overlap of 0 => class should be zero (background)
 42 |     zero_inds = np.where(max_overlaps == 0)[0]
 43 |     assert all(max_classes[zero_inds] == 0)
 44 |     # max overlap > 0 => class should not be zero (must be a fg class)
 45 |     nonzero_inds = np.where(max_overlaps > 0)[0]
 46 |     assert all(max_classes[nonzero_inds] != 0)
 47 | 
 48 | 
 49 | def rank_roidb_ratio(roidb):
 50 |     # rank roidb based on the ratio between width and height.
 51 |     ratio_large = 2 # largest ratio to preserve.
 52 |     ratio_small = 0.5 # smallest ratio to preserve.    
 53 |     
 54 |     ratio_list = []
 55 |     for i in range(len(roidb)):
 56 |       width = roidb[i]['width']
 57 |       height = roidb[i]['height']
 58 |       ratio = width / float(height)
 59 | 
 60 |       # trim the ratio into 0.5 ~ 2.
 61 |       # remark need_crop if the ratio over that range
 62 |       if ratio > ratio_large:
 63 |         roidb[i]['need_crop'] = 1
 64 |         ratio = ratio_large
 65 |       elif ratio < ratio_small:
 66 |         roidb[i]['need_crop'] = 1
 67 |         ratio = ratio_small        
 68 |       else:
 69 |         roidb[i]['need_crop'] = 0
 70 | 
 71 |       ratio_list.append(ratio)
 72 | 
 73 |     ratio_list = np.array(ratio_list)
 74 |     ratio_index = np.argsort(ratio_list)
 75 |     # return sorted ratio list, index
 76 |     # ex. [0.5, 0.5, 1., 1.6, 2. 2.]
 77 |     return ratio_list[ratio_index], ratio_index
 78 | 
 79 | def filter_roidb(roidb):
 80 |     # filter the image without bounding box.
 81 |     print('before filtering, there are %d images...' % (len(roidb)))
 82 |     i = 0
 83 |     while i < len(roidb):
 84 |       if len(roidb[i]['boxes']) == 0:
 85 |         del roidb[i]
 86 |         i -= 1
 87 |       i += 1
 88 | 
 89 |     print('after filtering, there are %d images...' % (len(roidb)))
 90 |     return roidb
 91 | 
 92 | def combined_roidb(imdb_names, training=True):
 93 |   """
 94 |   Combine multiple roidbs
 95 |   """
 96 |   print(imdb_names)
 97 | 
 98 |   def get_training_roidb(imdb):
 99 |     """Returns a roidb (Region of Interest database) for use in training."""
100 |     if cfg.TRAIN.USE_FLIPPED:
101 |       print('Appending horizontally-flipped training examples...')
102 |       imdb.append_flipped_images()
103 |       print('done')
104 | 
105 |     print('Preparing training data...')
106 | 
107 |     prepare_roidb(imdb)
108 |     #ratio_index = rank_roidb_ratio(imdb)
109 |     print('done')
110 | 
111 |     return imdb.roidb
112 |   
113 |   def get_roidb(imdb_name):
114 |     imdb = get_imdb(imdb_name)
115 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
116 |     imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
117 |     print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
118 |     roidb = get_training_roidb(imdb)
119 |     return roidb
120 | 
121 |   roidbs = [get_roidb(s) for s in imdb_names.split('+')]
122 |   roidb = roidbs[0]
123 | 
124 |   if len(roidbs) > 1:
125 |     for r in roidbs[1:]:
126 |       roidb.extend(r)
127 |     tmp = get_imdb(imdb_names.split('+')[1])
128 |     imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
129 |   else:
130 |     imdb = get_imdb(imdb_names)
131 | 
132 |   if training:
133 |     roidb = filter_roidb(roidb)
134 | 
135 |   ratio_list, ratio_index = rank_roidb_ratio(roidb)
136 | 
137 |   return imdb, roidb, ratio_list, ratio_index
138 | 


--------------------------------------------------------------------------------
/lib/model/roi_crop/src/roi_crop_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <stdbool.h>
  3 | #include <stdio.h>
  4 | #include "roi_crop_cuda_kernel.h"
  5 | 
  6 | #define real float
  7 | 
  8 | // this symbol will be resolved automatically from PyTorch libs
  9 | extern THCState *state;
 10 | 
 11 | // Bilinear sampling is done in BHWD (coalescing is not obvious in BDHW)
 12 | // we assume BHWD format in inputImages
 13 | // we assume BHW(YX) format on grids
 14 | 
 15 | int BilinearSamplerBHWD_updateOutput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *output){
 16 | //  THCState *state = getCutorchState(L);
 17 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 18 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 19 | //  THCudaTensor *output = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 20 | 
 21 |   int success = 0;
 22 |   success = BilinearSamplerBHWD_updateOutput_cuda_kernel(THCudaTensor_size(state, output, 1),
 23 | 							 THCudaTensor_size(state, output, 3),
 24 | 							 THCudaTensor_size(state, output, 2),
 25 | 							 THCudaTensor_size(state, output, 0),
 26 | 							 THCudaTensor_size(state, inputImages, 1),
 27 | 							 THCudaTensor_size(state, inputImages, 2),
 28 | 							 THCudaTensor_size(state, inputImages, 3),
 29 | 							 THCudaTensor_size(state, inputImages, 0),
 30 | 							 THCudaTensor_data(state, inputImages),
 31 | 							 THCudaTensor_stride(state, inputImages, 0),
 32 | 							 THCudaTensor_stride(state, inputImages, 1),
 33 | 							 THCudaTensor_stride(state, inputImages, 2),
 34 | 							 THCudaTensor_stride(state, inputImages, 3),
 35 | 							 THCudaTensor_data(state, grids),
 36 | 							 THCudaTensor_stride(state, grids, 0),
 37 | 							 THCudaTensor_stride(state, grids, 3),
 38 | 							 THCudaTensor_stride(state, grids, 1),
 39 | 							 THCudaTensor_stride(state, grids, 2),
 40 | 							 THCudaTensor_data(state, output),
 41 | 							 THCudaTensor_stride(state, output, 0),
 42 | 							 THCudaTensor_stride(state, output, 1),
 43 | 							 THCudaTensor_stride(state, output, 2),
 44 | 							 THCudaTensor_stride(state, output, 3),
 45 | 							 THCState_getCurrentStream(state));
 46 | 
 47 |   //check for errors
 48 |   if (!success) {
 49 |     THError("aborting");
 50 |   }
 51 |   return 1;
 52 | }
 53 | 
 54 | int BilinearSamplerBHWD_updateGradInput_cuda(THCudaTensor *inputImages, THCudaTensor *grids, THCudaTensor *gradInputImages,
 55 |                                         THCudaTensor *gradGrids, THCudaTensor *gradOutput)
 56 | {
 57 | //  THCState *state = getCutorchState(L);
 58 | //  THCudaTensor *inputImages = (THCudaTensor *)luaT_checkudata(L, 2, "torch.CudaTensor");
 59 | //  THCudaTensor *grids = (THCudaTensor *)luaT_checkudata(L, 3, "torch.CudaTensor");
 60 | //  THCudaTensor *gradInputImages = (THCudaTensor *)luaT_checkudata(L, 4, "torch.CudaTensor");
 61 | //  THCudaTensor *gradGrids = (THCudaTensor *)luaT_checkudata(L, 5, "torch.CudaTensor");
 62 | //  THCudaTensor *gradOutput = (THCudaTensor *)luaT_checkudata(L, 6, "torch.CudaTensor");
 63 | 
 64 |   int success = 0;
 65 |   success = BilinearSamplerBHWD_updateGradInput_cuda_kernel(THCudaTensor_size(state, gradOutput, 1),
 66 | 							    THCudaTensor_size(state, gradOutput, 3),
 67 | 							    THCudaTensor_size(state, gradOutput, 2),
 68 | 							    THCudaTensor_size(state, gradOutput, 0),
 69 | 							    THCudaTensor_size(state, inputImages, 1),
 70 | 							    THCudaTensor_size(state, inputImages, 2),
 71 | 							    THCudaTensor_size(state, inputImages, 3),
 72 | 							    THCudaTensor_size(state, inputImages, 0),
 73 | 							    THCudaTensor_data(state, inputImages),
 74 | 							    THCudaTensor_stride(state, inputImages, 0),
 75 | 							    THCudaTensor_stride(state, inputImages, 1),
 76 | 							    THCudaTensor_stride(state, inputImages, 2),
 77 | 							    THCudaTensor_stride(state, inputImages, 3),
 78 | 							    THCudaTensor_data(state, grids),
 79 | 							    THCudaTensor_stride(state, grids, 0),
 80 | 							    THCudaTensor_stride(state, grids, 3),
 81 | 							    THCudaTensor_stride(state, grids, 1),
 82 | 							    THCudaTensor_stride(state, grids, 2),
 83 | 							    THCudaTensor_data(state, gradInputImages),
 84 | 							    THCudaTensor_stride(state, gradInputImages, 0),
 85 | 							    THCudaTensor_stride(state, gradInputImages, 1),
 86 | 							    THCudaTensor_stride(state, gradInputImages, 2),
 87 | 							    THCudaTensor_stride(state, gradInputImages, 3),
 88 | 							    THCudaTensor_data(state, gradGrids),
 89 | 							    THCudaTensor_stride(state, gradGrids, 0),
 90 | 							    THCudaTensor_stride(state, gradGrids, 3),
 91 | 							    THCudaTensor_stride(state, gradGrids, 1),
 92 | 							    THCudaTensor_stride(state, gradGrids, 2),
 93 | 							    THCudaTensor_data(state, gradOutput),
 94 | 							    THCudaTensor_stride(state, gradOutput, 0),
 95 | 							    THCudaTensor_stride(state, gradOutput, 1),
 96 | 							    THCudaTensor_stride(state, gradOutput, 2),
 97 | 							    THCudaTensor_stride(state, gradOutput, 3),
 98 | 							    THCState_getCurrentStream(state));
 99 | 
100 |   //check for errors
101 |   if (!success) {
102 |     THError("aborting");
103 |   }
104 |   return 1;
105 | }
106 | 


--------------------------------------------------------------------------------
/lib/model/rpn/rpn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | 
  7 | from model.utils.config import cfg
  8 | from .proposal_layer import _ProposalLayer
  9 | from .anchor_target_layer import _AnchorTargetLayer
 10 | from model.utils.net_utils import _smooth_l1_loss
 11 | 
 12 | import numpy as np
 13 | import math
 14 | import pdb
 15 | import time
 16 | 
 17 | class _RPN(nn.Module):
 18 |     """ region proposal network """
 19 |     def __init__(self, din):
 20 |         super(_RPN, self).__init__()
 21 |         
 22 |         self.din = din  # get depth of input feature map, e.g., 512
 23 |         self.anchor_scales = cfg.ANCHOR_SCALES
 24 |         self.anchor_ratios = cfg.ANCHOR_RATIOS
 25 |         self.feat_stride = cfg.FEAT_STRIDE[0]
 26 | 
 27 |         # define the convrelu layers processing input feature map
 28 |         self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 29 | 
 30 |         # define bg/fg classifcation score layer
 31 |         self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
 32 |         self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
 33 | 
 34 |         # define anchor box offset prediction layer
 35 |         self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
 36 |         self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
 37 | 
 38 |         # define proposal layer
 39 |         self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 40 | 
 41 |         # define anchor target layer
 42 |         self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 43 | 
 44 |         self.rpn_loss_cls = 0
 45 |         self.rpn_loss_box = 0
 46 | 
 47 |     @staticmethod
 48 |     def reshape(x, d):
 49 |         input_shape = x.size()
 50 |         x = x.view(
 51 |             input_shape[0],
 52 |             int(d),
 53 |             int(float(input_shape[1] * input_shape[2]) / float(d)),
 54 |             input_shape[3]
 55 |         )
 56 |         return x
 57 | 
 58 |     def forward(self, base_feat, im_info, gt_boxes, num_boxes):
 59 | 
 60 |         batch_size = base_feat.size(0)
 61 | 
 62 |         # return feature map after convrelu layer
 63 |         rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
 64 |         # get rpn classification score
 65 |         rpn_cls_score = self.RPN_cls_score(rpn_conv1)  # [B, 9*2, H, W]
 66 | 
 67 |         rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 68 |         rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
 69 |         rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)  # [B, 9*2, H, W]
 70 | 
 71 |         # get rpn offsets to the anchor boxes
 72 |         rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)  # [B, 9*4, H, W]
 73 | 
 74 |         # proposal layer
 75 |         cfg_key = 'TRAIN' if self.training else 'TEST'
 76 | 
 77 |         rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
 78 |                                  im_info, cfg_key))
 79 | 
 80 |         self.rpn_loss_cls = 0
 81 |         self.rpn_loss_box = 0
 82 | 
 83 |         # generating training labels and build the rpn loss
 84 |         if self.training:
 85 |             assert gt_boxes is not None
 86 | 
 87 |             rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
 88 |             ##################
 89 |             # rpn_data: list of length=4
 90 |             # [0]: labels [B, 1, 9*H, W]
 91 |             # [1]: bbox_targets [B, 9*4, H, W]
 92 |             # [2]: bbox_inside_weights [B, 9*4, H, W]
 93 |             # [3]: bbox_outside_weights [B, 9*4, H, W]
 94 |             ##################
 95 | 
 96 |             # compute classification loss
 97 |             rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
 98 |             rpn_label = rpn_data[0].view(batch_size, -1)
 99 |             rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
100 |           
101 |             rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)  # [B*RPN_BATCHSIZE, 2]
102 |             rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
103 |             rpn_label = Variable(rpn_label.long())  # [B*RPN_BATCHSIZE]
104 |             self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
105 |             fg_cnt = torch.sum(rpn_label.data.ne(0))
106 | 
107 |             rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
108 | 
109 |             # compute bbox regression loss
110 |             rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
111 |             rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
112 |             rpn_bbox_targets = Variable(rpn_bbox_targets)
113 | 
114 |             self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
115 |                                                             rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])
116 | 
117 |         return rois, self.rpn_loss_cls, self.rpn_loss_box
118 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({
128 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
129 |                          order_t.device(), keep.scalar_type())
130 |                      }).sort(0, false));
131 | }
132 | 


--------------------------------------------------------------------------------
/lib/model/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/lib/model/utils/fsod_logger.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | from torch.utils.tensorboard import SummaryWriter
  5 | from torchvision.utils import make_grid
  6 | 
  7 | 
  8 | class FSODInferenceLogger:
  9 |     def __init__(self, log_dir):
 10 |         self.writer = SummaryWriter(log_dir)
 11 | 
 12 |     def write(self, save_step, gt, support, predict, save_im=False):
 13 |         # self._add_scalars(save_step, train_log)
 14 |         if save_im:
 15 |             self._add_images(save_step, gt, support, predict)
 16 |   
 17 |     def close(self):
 18 |         """Close the writer.
 19 |         """
 20 |         self.writer.close()
 21 | 
 22 |     def _add_images(self, save_step, gt, support, predict):
 23 |         # gt = gt.cpu()
 24 |         # support = support.cpu()
 25 |         # predict = predict.cpu()
 26 |         # H, W = gt[0].size(1), gt[0].size(2)
 27 |         # support_img = support[i].permute(1, 2, 0).numpy()
 28 |         # support_img = support_img[:, :, ::-1].copy()
 29 |         gt_grid = make_grid(gt, nrow=1, normalize=True, scale_each=True, pad_value=1)
 30 |         support_grid = make_grid(support, nrow=1, normalize=True, scale_each=True, pad_value=1)
 31 |         pred_grid = make_grid(predict, nrow=1, normalize=True, scale_each=True, pad_value=1)
 32 | 
 33 |         grid = torch.cat((gt_grid, support_grid, pred_grid), dim=-1)
 34 |         self.writer.add_image('gt&pred', grid, save_step)
 35 | 
 36 | 
 37 | class FSODLogger:
 38 |     def __init__(self, log_dir, train_shot=5):
 39 |         self.writer = SummaryWriter(log_dir)
 40 |         self.train_shot = train_shot
 41 | 
 42 |     def write(self, save_step, train_log, query=None, support=None, boxes=None, save_im=False):
 43 |         self._add_scalars(save_step, train_log)
 44 |         if save_im:
 45 |             self._add_images(save_step, query, support, boxes)
 46 | 
 47 |     def close(self):
 48 |         """Close the writer.
 49 |         """
 50 |         self.writer.close()
 51 | 
 52 |     def _add_scalars(self, save_step, train_log):
 53 |       for key in train_log.keys():
 54 |         self.writer.add_scalars(key, {'train': train_log[key]}, save_step)
 55 | 
 56 |     def _add_images(self, save_step, query, supports, boxes):
 57 |         query = query.cpu()
 58 |         support = supports[:, 0, :, :, :].cpu()
 59 |         neg_support = supports[:, self.train_shot, :, :, :].cpu()
 60 |         boxes = boxes.cpu()
 61 |         query_ims = []
 62 |         support_ims = []
 63 |         neg_support_ims = []
 64 |         H, W = query[0].size(1), query[0].size(2)
 65 |         for i in range(query.size(0)):
 66 |             query_im = query[i].permute(1, 2, 0).numpy()
 67 |             support_im = support[i].permute(1, 2, 0).numpy()
 68 |             neg_support_im = neg_support[i].permute(1, 2, 0).numpy()
 69 |             query_im = query_im[:, :, ::-1].copy()
 70 |             support_im = support_im[:, :, ::-1].copy()
 71 |             neg_support_im = neg_support_im[:, :, ::-1].copy()
 72 |             boxes_of_one_img = boxes[i]
 73 | 
 74 |             for ii in range(boxes_of_one_img.size(0)):
 75 |                 box = boxes_of_one_img[ii]
 76 |                 if box[4] == 0:
 77 |                     continue
 78 |                 x = box[0]
 79 |                 y = box[1]
 80 |                 w = box[2] - box[0]
 81 |                 h = box[3] - box[1]
 82 |                 query_im = cv2.rectangle(np.array(query_im), (int(x), int(y)), (int(x+w), int(y+h)), (220, 0, 50), 2)
 83 | 
 84 |             query_im = torch.from_numpy(query_im).permute(2,0,1)
 85 |             support_im = cv2.resize(support_im, (W, H), interpolation=cv2.INTER_LINEAR)
 86 |             neg_support_im = cv2.resize(neg_support_im, (W, H), interpolation=cv2.INTER_LINEAR)
 87 |             support_im = torch.from_numpy(support_im).permute(2,0,1)
 88 |             neg_support_im = torch.from_numpy(neg_support_im).permute(2,0,1)
 89 | 
 90 |             query_ims += [query_im]
 91 |             support_ims += [support_im]
 92 |             neg_support_ims += [neg_support_im]
 93 | 
 94 |         query_ims = torch.stack(query_ims, 0)
 95 |         support_ims = torch.stack(support_ims, 0)
 96 |         neg_support_ims = torch.stack(neg_support_ims, 0)
 97 | 
 98 |         train_query = make_grid(query_ims, nrow=1, normalize=True, scale_each=True, pad_value=1)
 99 |         train_support = make_grid(support_ims, nrow=1, normalize=True, scale_each=True, pad_value=1)
100 |         train_support_2 = make_grid(neg_support_ims, nrow=1, normalize=True, scale_each=True, pad_value=1)
101 |         grid = torch.cat((train_query, train_support, train_support_2), dim=-1)
102 |         self.writer.add_image('train', grid, save_step)
103 | 
104 | class BaseLogger:
105 |     def __init__(self, log_dir):
106 |         self.writer = SummaryWriter(log_dir)
107 | 
108 |     def write(self, save_step, gt, support, predict):
109 |         # self._add_scalars(save_step, train_log)
110 |         self._add_images(save_step, gt, support, predict)
111 | 
112 |     def close(self):
113 |         """Close the writer.
114 |         """
115 |         self.writer.close()
116 | 
117 |     def _add_images(self, save_step, gt, support, predict):
118 |         # gt = gt.cpu()
119 |         # support = support.cpu()
120 |         # predict = predict.cpu()
121 |         # H, W = gt[0].size(1), gt[0].size(2)
122 |         # support_img = support[i].permute(1, 2, 0).numpy()
123 |         # support_img = support_img[:, :, ::-1].copy()
124 |         gt_grid = make_grid(gt, nrow=1, normalize=True, scale_each=True, pad_value=1)
125 |         support_grid = make_grid(support, nrow=1, normalize=True, scale_each=True, pad_value=1)
126 |         pred_grid = make_grid(predict, nrow=1, normalize=True, scale_each=True, pad_value=1)
127 | 
128 |         gt_pred_grid = torch.cat((gt_grid, pred_grid), dim=-1)
129 |         self.writer.add_image('gt&pred', gt_pred_grid)
130 |         self.writer.add_image('support', support_grid)
131 | 
132 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/allcls_fs_loader.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | import torch
  3 | import numpy as np
  4 | import random
  5 | import cv2
  6 | import os
  7 | from PIL import Image
  8 | from pathlib import Path
  9 | from torch.utils.data.sampler import Sampler
 10 | from scipy.misc import imread
 11 | from model.utils.config import cfg
 12 | from model.rpn.bbox_transform import bbox_transform_inv, clip_boxes
 13 | from roi_data_layer.minibatch import get_minibatch, get_minibatch
 14 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
 15 | 
 16 | 
 17 | class ALLCLSFSLoader(data.Dataset):
 18 |     def __init__(self, imdb, roidb, ratio_list, ratio_index, support_dir,
 19 |                 batch_size, num_classes, num_shot=5, training=True, normalize=None):
 20 |         self._imdb = imdb
 21 |         self._roidb = roidb
 22 |         self._num_classes = num_classes
 23 |         self.trim_height = cfg.TRAIN.TRIM_HEIGHT
 24 |         self.trim_width = cfg.TRAIN.TRIM_WIDTH
 25 |         self.max_num_box = cfg.MAX_NUM_GT_BOXES
 26 |         self.training = training
 27 |         self.normalize = normalize
 28 |         self.ratio_list = ratio_list
 29 |         self.ratio_index = ratio_index
 30 |         self.batch_size = batch_size
 31 |         self.data_size = len(self.ratio_list)
 32 | 
 33 |         # given the ratio_list, we want to make the ratio same for each batch.
 34 |         self.ratio_list_batch = torch.Tensor(self.data_size).zero_()
 35 |         num_batch = int(np.ceil(len(ratio_index) / batch_size))
 36 |         for i in range(num_batch):
 37 |             left_idx = i*batch_size
 38 |             right_idx = min((i+1)*batch_size-1, self.data_size-1)
 39 | 
 40 |             if ratio_list[right_idx] < 1:
 41 |                 # for ratio < 1, we preserve the leftmost in each batch.
 42 |                 target_ratio = ratio_list[left_idx]
 43 |             elif ratio_list[left_idx] > 1:
 44 |                 # for ratio > 1, we preserve the rightmost in each batch.
 45 |                 target_ratio = ratio_list[right_idx]
 46 |             else:
 47 |                 # for ratio cross 1, we make it to be 1.
 48 |                 target_ratio = 1
 49 | 
 50 |             self.ratio_list_batch[left_idx:(right_idx+1)] = target_ratio
 51 | 
 52 |         self.support_pool = [[] for i in range(self._num_classes)]
 53 |         self._label_to_cls_name = dict(list(zip(list(range(self._num_classes)), self._imdb.classes)))
 54 |         for _label in range(1, self._num_classes):
 55 |             cls_name = self._label_to_cls_name[_label]
 56 |             cls_dir = os.path.join(support_dir, cls_name)
 57 |             support_im_paths = [str(_p) for _p in list(Path(cls_dir).glob('*.jpg'))]
 58 |             if len(support_im_paths) == 0:
 59 |                 raise Exception(f'support data not found in {cls_dir}')
 60 |             self.support_pool[_label].extend(support_im_paths)
 61 | 
 62 |         self.support_im_size = 320
 63 |         self.testing_shot = num_shot
 64 | 
 65 | 
 66 |     def __getitem__(self, index):
 67 |         index_ratio = index
 68 | 
 69 |         # get the anchor index for current sample index
 70 |         # here we set the anchor index to the last one
 71 |         # sample in this group
 72 |         minibatch_db = [self._roidb[index_ratio]]
 73 |         blobs = get_minibatch(minibatch_db)
 74 |         data = torch.from_numpy(blobs['data'])
 75 |         im_info = torch.from_numpy(blobs['im_info'])  # (H, W, scale)
 76 |         # we need to random shuffle the bounding box.
 77 |         data_height, data_width = data.size(1), data.size(2)
 78 | 
 79 |         data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
 80 |         im_info = im_info.view(3)
 81 | 
 82 |         # gt_boxes = torch.FloatTensor([1,1,1,1,1])
 83 |         gt_boxes = torch.from_numpy(blobs['gt_boxes'])
 84 | 
 85 |         all_cls_gt_boxes = gt_boxes.clone()
 86 | 
 87 |         cur_cls_id_list = []
 88 |         for i in range(gt_boxes.size(0)):
 89 |             if gt_boxes[i, 4] not in cur_cls_id_list:
 90 |                 cur_cls_id_list.append(gt_boxes[i, 4])
 91 |         random.seed(0)
 92 |         chosen_cls = random.sample(cur_cls_id_list, k=1)[0]
 93 | 
 94 |         new_gt_boxes = []
 95 |         for i in range(gt_boxes.size(0)):
 96 |             if gt_boxes[i, 4] == chosen_cls:
 97 |                 new_gt_boxes.append([gt_boxes[i, 0], gt_boxes[i, 1], gt_boxes[i, 2], gt_boxes[i, 3], chosen_cls])
 98 |         gt_boxes = torch.from_numpy(np.asarray(new_gt_boxes))
 99 | 
100 |         num_boxes = 0
101 | 
102 |         # get supports
103 |         support_data_all = np.zeros((self.testing_shot, 3, self.support_im_size, self.support_im_size), dtype=np.float32)
104 |         current_gt_class_id = int(gt_boxes[0][4])
105 |         pool = self.support_pool[current_gt_class_id]
106 | 
107 |         random.seed(index)
108 |         selected_supports = random.sample(pool, k=self.testing_shot)
109 |         
110 |         for i, _path in enumerate(selected_supports):
111 |             support_im = imread(_path)[:,:,::-1]  # rgb -> bgr
112 |             target_size = np.min(support_im.shape[0:2])  # don't change the size
113 |             support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
114 |             _h, _w = support_im.shape[0], support_im.shape[1]
115 |             if _h > _w:
116 |                 resize_scale = float(self.support_im_size) / float(_h)
117 |                 unfit_size = int(_w * resize_scale)
118 |                 support_im = cv2.resize(support_im, (unfit_size, self.support_im_size), interpolation=cv2.INTER_LINEAR)
119 |             else:
120 |                 resize_scale = float(self.support_im_size) / float(_w)
121 |                 unfit_size = int(_h * resize_scale)
122 |                 support_im = cv2.resize(support_im, (self.support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR)
123 |             h, w = support_im.shape[0], support_im.shape[1]
124 |             support_data_all[i, :, :h, :w] = np.transpose(support_im, (2, 0, 1)) 
125 |         supports = torch.from_numpy(support_data_all)
126 | 
127 | 
128 |         return data, im_info, gt_boxes, num_boxes, supports, all_cls_gt_boxes
129 | 
130 |     def __len__(self):
131 |         return len(self._roidb)


--------------------------------------------------------------------------------
/lib/model/nms/src/nms_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include <stdbool.h>
  9 | #include <stdio.h>
 10 | #include <vector>
 11 | #include <iostream>
 12 | #include "nms_cuda_kernel.h"
 13 | 
 14 | #define CUDA_WARN(XXX) \
 15 |     do { if (XXX != cudaSuccess) std::cout << "CUDA Error: " << \
 16 |         cudaGetErrorString(XXX) << ", at line " << __LINE__ \
 17 | << std::endl; cudaDeviceSynchronize(); } while (0)
 18 | 
 19 | #define CUDA_CHECK(condition) \
 20 |   /* Code block avoids redefinition of cudaError_t error */ \
 21 |   do { \
 22 |     cudaError_t error = condition; \
 23 |     if (error != cudaSuccess) { \
 24 |       std::cout << cudaGetErrorString(error) << std::endl; \
 25 |     } \
 26 |   } while (0)
 27 | 
 28 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 29 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 30 | 
 31 | __device__ inline float devIoU(float const * const a, float const * const b) {
 32 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 33 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 34 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 35 |   float interS = width * height;
 36 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 37 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 38 |   return interS / (Sa + Sb - interS);
 39 | }
 40 | 
 41 | __global__ void nms_kernel(int n_boxes, float nms_overlap_thresh,
 42 |                            float *dev_boxes, unsigned long long *dev_mask) {
 43 |   const int row_start = blockIdx.y;
 44 |   const int col_start = blockIdx.x;
 45 | 
 46 |   // if (row_start > col_start) return;
 47 | 
 48 |   const int row_size =
 49 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 50 |   const int col_size =
 51 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 52 | 
 53 |   __shared__ float block_boxes[threadsPerBlock * 5];
 54 |   if (threadIdx.x < col_size) {
 55 |     block_boxes[threadIdx.x * 5 + 0] =
 56 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 57 |     block_boxes[threadIdx.x * 5 + 1] =
 58 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 59 |     block_boxes[threadIdx.x * 5 + 2] =
 60 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 61 |     block_boxes[threadIdx.x * 5 + 3] =
 62 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 63 |     block_boxes[threadIdx.x * 5 + 4] =
 64 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 65 |   }
 66 |   __syncthreads();
 67 | 
 68 |   if (threadIdx.x < row_size) {
 69 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 70 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 71 |     int i = 0;
 72 |     unsigned long long t = 0;
 73 |     int start = 0;
 74 |     if (row_start == col_start) {
 75 |       start = threadIdx.x + 1;
 76 |     }
 77 |     for (i = start; i < col_size; i++) {
 78 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 79 |         t |= 1ULL << i;
 80 |       }
 81 |     }
 82 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 83 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 84 |   }
 85 | }
 86 | 
 87 | void nms_cuda_compute(int* keep_out, int *num_out, float* boxes_host, int boxes_num,
 88 |           int boxes_dim, float nms_overlap_thresh) {
 89 | 
 90 |   float* boxes_dev = NULL;
 91 |   unsigned long long* mask_dev = NULL;
 92 | 
 93 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 94 | 
 95 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
 96 |                         boxes_num * boxes_dim * sizeof(float)));
 97 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
 98 |                         boxes_host,
 99 |                         boxes_num * boxes_dim * sizeof(float),
100 |                         cudaMemcpyHostToDevice));
101 | 
102 |   CUDA_CHECK(cudaMalloc(&mask_dev,
103 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
104 | 
105 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
106 |               DIVUP(boxes_num, threadsPerBlock));
107 |   dim3 threads(threadsPerBlock);
108 | 
109 |   // printf("i am at line %d\n", boxes_num);
110 |   // printf("i am at line %d\n", boxes_dim);  
111 | 
112 |   nms_kernel<<<blocks, threads>>>(boxes_num,
113 |                                   nms_overlap_thresh,
114 |                                   boxes_dev,
115 |                                   mask_dev);
116 | 
117 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
118 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
119 |                         mask_dev,
120 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
121 |                         cudaMemcpyDeviceToHost));
122 | 
123 |   std::vector<unsigned long long> remv(col_blocks);
124 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
125 | 
126 |   // we need to create a memory for keep_out on cpu
127 |   // otherwise, the following code cannot run
128 | 
129 |   int* keep_out_cpu = new int[boxes_num];
130 | 
131 |   int num_to_keep = 0;
132 |   for (int i = 0; i < boxes_num; i++) {
133 |     int nblock = i / threadsPerBlock;
134 |     int inblock = i % threadsPerBlock;
135 | 
136 |     if (!(remv[nblock] & (1ULL << inblock))) {
137 |       // orignal: keep_out[num_to_keep++] = i;
138 |       keep_out_cpu[num_to_keep++] = i;
139 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
140 |       for (int j = nblock; j < col_blocks; j++) {
141 |         remv[j] |= p[j];
142 |       }
143 |     }
144 |   }
145 | 
146 |   // copy keep_out_cpu to keep_out on gpu
147 |   CUDA_WARN(cudaMemcpy(keep_out, keep_out_cpu, boxes_num * sizeof(int),cudaMemcpyHostToDevice));  
148 | 
149 |   // *num_out = num_to_keep;
150 | 
151 |   // original: *num_out = num_to_keep;
152 |   // copy num_to_keep to num_out on gpu
153 | 
154 |   CUDA_WARN(cudaMemcpy(num_out, &num_to_keep, 1 * sizeof(int),cudaMemcpyHostToDevice));  
155 | 
156 |   // release cuda memory
157 |   CUDA_CHECK(cudaFree(boxes_dev));
158 |   CUDA_CHECK(cudaFree(mask_dev));
159 |   // release cpu memory
160 |   delete []keep_out_cpu;
161 | }
162 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/inference_loader.py:
--------------------------------------------------------------------------------
  1 | """The data layer used during training to train a Fast R-CNN network.
  2 | """
  3 | import numpy as np
  4 | import random
  5 | import time
  6 | import pdb
  7 | import cv2
  8 | import torch.utils.data as data
  9 | import torch
 10 | import os
 11 | from pathlib import Path
 12 | from PIL import Image
 13 | from scipy.misc import imread
 14 | 
 15 | from roi_data_layer.minibatch import get_minibatch
 16 | from model.utils.config import cfg
 17 | from model.rpn.bbox_transform import bbox_transform_inv, clip_boxes
 18 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
 19 | 
 20 | from pycocotools.coco import COCO
 21 | 
 22 | 
 23 | class InferenceLoader(data.Dataset):
 24 |     def __init__(self, epi_random_seed, imdb, roidb, ratio_list, ratio_index, support_dir,
 25 |                 batch_size, num_classes, num_shot=5, training=True, normalize=None):
 26 |         self._imdb = imdb
 27 |         self._roidb = roidb
 28 |         self._num_classes = num_classes
 29 |         self.trim_height = cfg.TRAIN.TRIM_HEIGHT
 30 |         self.trim_width = cfg.TRAIN.TRIM_WIDTH
 31 |         self.max_num_box = cfg.MAX_NUM_GT_BOXES
 32 |         self.training = training
 33 |         self.normalize = normalize
 34 |         self.ratio_list = ratio_list
 35 |         self.ratio_index = ratio_index
 36 |         self.batch_size = batch_size
 37 |         self.data_size = len(self.ratio_list)
 38 |         self.epi_random_seed = epi_random_seed
 39 |         #############################################################################
 40 |         # roidb:
 41 |         # {'width': 640, 'height': 484, 'boxes': array([[ 58, 152, 268, 243]], dtype=uint16), 
 42 |         # 'gt_classes': array([79], dtype=int32), flipped': False, 'seg_areas': array([12328.567], dtype=float32),
 43 |         # 'img_id': 565198, 'image': '/home/tungi/FSOD/data/coco/images/val2014/COCO_val2014_000000565198.jpg', 
 44 |         # 'max_classes': array([79]), 'max_overlaps': array([1.], dtype=float32), 'need_crop': 0}
 45 | 
 46 |         # name_to_coco_cls_ind = {'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 'airplane': 5, 'bus': 6, 'train': 7,
 47 |         #  	'truck': 8, 'boat': 9, 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 'parking meter': 14, 'bench': 15,
 48 |         # 	'bird': 16, 'cat': 17, 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24,
 49 |         # 	'giraffe': 25, 'backpack': 27, 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 'frisbee': 34, 'skis': 35,
 50 |         # 	'snowboard': 36, 'sports ball': 37, 'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 'skateboard': 41, 'surfboard': 42,
 51 |         # 	'tennis racket': 43, 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 
 52 |         # 	'banana': 52, 'apple': 53, 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58, 'pizza': 59, 
 53 |         # 	'donut': 60, 'cake': 61, 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65, 'dining table': 67, 'toilet': 70, 'tv': 72,
 54 |         # 	'laptop': 73, 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 
 55 |         # 	'sink': 81, 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 'toothbrush': 90}
 56 |         #############################################################################
 57 | 
 58 |         self.support_im_size = 320
 59 |         self.testing_shot = num_shot
 60 | 
 61 |         self.support_pool = [[] for i in range(self._num_classes)]
 62 |         self._label_to_cls_name = dict(list(zip(list(range(self._num_classes)), self._imdb.classes)))
 63 |         for _label in range(1, self._num_classes):
 64 |             cls_name = self._label_to_cls_name[_label]
 65 |             cls_dir = os.path.join(support_dir, cls_name)
 66 |             support_im_paths = [str(_p) for _p in list(Path(cls_dir).glob('*.jpg'))]
 67 |             if len(support_im_paths) == 0:
 68 |                 raise Exception(f'support data not found in {cls_dir}')
 69 |             random.seed(epi_random_seed)  # fix the shots
 70 |             support_im_paths = random.sample(support_im_paths, k=self.testing_shot)
 71 |             self.support_pool[_label].extend(support_im_paths)
 72 | 
 73 | 
 74 |     def __getitem__(self, index):
 75 |         # testing
 76 |         index_ratio = index
 77 |         # though it is called minibatch, in fact it contains only one img here
 78 |         minibatch_db = [self._roidb[index_ratio]]
 79 | 
 80 |         # load query
 81 |         blobs = get_minibatch(minibatch_db)
 82 |         data = torch.from_numpy(blobs['data'])
 83 |         im_info = torch.from_numpy(blobs['im_info'])  # (H, W, scale)
 84 |         data_height, data_width = data.size(1), data.size(2)
 85 |         data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
 86 |         im_info = im_info.view(3)
 87 |         gt_boxes = torch.from_numpy(blobs['gt_boxes'])
 88 |         num_boxes = gt_boxes.size(0)
 89 |         
 90 |         # get supports
 91 |         support_data_all = np.zeros((self.testing_shot, 3, self.support_im_size, self.support_im_size), dtype=np.float32)
 92 |         current_gt_class_id = int(gt_boxes[0][4])
 93 |         selected_supports = self.support_pool[current_gt_class_id]
 94 |         
 95 |         for i, _path in enumerate(selected_supports):
 96 |             support_im = imread(_path)[:,:,::-1]  # rgb -> bgr
 97 |             target_size = np.min(support_im.shape[0:2])  # don't change the size
 98 |             support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
 99 |             _h, _w = support_im.shape[0], support_im.shape[1]
100 |             if _h > _w:
101 |                 resize_scale = float(self.support_im_size) / float(_h)
102 |                 unfit_size = int(_w * resize_scale)
103 |                 support_im = cv2.resize(support_im, (unfit_size, self.support_im_size), interpolation=cv2.INTER_LINEAR)
104 |             else:
105 |                 resize_scale = float(self.support_im_size) / float(_w)
106 |                 unfit_size = int(_h * resize_scale)
107 |                 support_im = cv2.resize(support_im, (self.support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR)
108 |             h, w = support_im.shape[0], support_im.shape[1]
109 |             support_data_all[i, :, :h, :w] = np.transpose(support_im, (2, 0, 1)) 
110 |         supports = torch.from_numpy(support_data_all)
111 | 
112 | 
113 |         return data, im_info, gt_boxes, num_boxes, supports
114 | 
115 |     def __len__(self):
116 |         return len(self._roidb)


--------------------------------------------------------------------------------
/lib/model/utils/net_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import numpy as np
  6 | import torchvision.models as models
  7 | from model.utils.config import cfg
  8 | import cv2
  9 | import pdb
 10 | import random
 11 | 
 12 | def save_net(fname, net):
 13 |     import h5py
 14 |     h5f = h5py.File(fname, mode='w')
 15 |     for k, v in net.state_dict().items():
 16 |         h5f.create_dataset(k, data=v.cpu().numpy())
 17 | 
 18 | def load_net(fname, net):
 19 |     import h5py
 20 |     h5f = h5py.File(fname, mode='r')
 21 |     for k, v in net.state_dict().items():
 22 |         param = torch.from_numpy(np.asarray(h5f[k]))
 23 |         v.copy_(param)
 24 | 
 25 | def weights_normal_init(model, dev=0.01):
 26 |     if isinstance(model, list):
 27 |         for m in model:
 28 |             weights_normal_init(m, dev)
 29 |     else:
 30 |         for m in model.modules():
 31 |             if isinstance(m, nn.Conv2d):
 32 |                 m.weight.data.normal_(0.0, dev)
 33 |             elif isinstance(m, nn.Linear):
 34 |                 m.weight.data.normal_(0.0, dev)
 35 | 
 36 | 
 37 | def clip_gradient(model, clip_norm):
 38 |     """Computes a gradient clipping coefficient based on gradient norm."""
 39 |     totalnorm = 0
 40 |     for p in model.parameters():
 41 |         if p.requires_grad and p.grad is not None:
 42 |             modulenorm = p.grad.norm()
 43 |             totalnorm += modulenorm ** 2
 44 |     totalnorm = torch.sqrt(totalnorm).item()
 45 |     norm = (clip_norm / max(totalnorm, clip_norm))
 46 |     for p in model.parameters():
 47 |         if p.requires_grad and p.grad is not None:
 48 |             p.grad.mul_(norm)
 49 | 
 50 | def vis_detections(im, class_name, dets, thresh=0.8):
 51 |     """Visual debugging of detections."""
 52 |     for i in range(np.minimum(10, dets.shape[0])):
 53 |         bbox = tuple(int(np.round(x)) for x in dets[i, :4])
 54 |         score = dets[i, -1]
 55 |         if score > thresh:
 56 |             cv2.rectangle(im, bbox[0:2], bbox[2:4], (0, 204, 0), 2)
 57 |             cv2.putText(im, '%s: %.3f' % (class_name, score), (bbox[0], bbox[1] + 15), cv2.FONT_HERSHEY_PLAIN,
 58 |                         1.0, (0, 0, 255), thickness=1)
 59 |     return im
 60 | 
 61 | 
 62 | def adjust_learning_rate(optimizer, decay=0.1):
 63 |     """Sets the learning rate to the initial LR decayed by 0.5 every 20 epochs"""
 64 |     for param_group in optimizer.param_groups:
 65 |         param_group['lr'] = decay * param_group['lr']
 66 | 
 67 | 
 68 | def save_checkpoint(state, filename):
 69 |     torch.save(state, filename)
 70 | 
 71 | def _smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights, sigma=1.0, dim=[1]):
 72 | 
 73 |     sigma_2 = sigma ** 2
 74 |     box_diff = bbox_pred - bbox_targets
 75 |     in_box_diff = bbox_inside_weights * box_diff
 76 |     abs_in_box_diff = torch.abs(in_box_diff)
 77 |     smoothL1_sign = (abs_in_box_diff < 1. / sigma_2).detach().float()
 78 |     in_loss_box = torch.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
 79 |                   + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
 80 |     out_loss_box = bbox_outside_weights * in_loss_box
 81 |     loss_box = out_loss_box
 82 |     for i in sorted(dim, reverse=True):
 83 |       loss_box = loss_box.sum(i)
 84 |     loss_box = loss_box.mean()
 85 |     return loss_box
 86 | 
 87 | def _crop_pool_layer(bottom, rois, max_pool=True):
 88 |     # code modified from
 89 |     # https://github.com/ruotianluo/pytorch-faster-rcnn
 90 |     # implement it using stn
 91 |     # box to affine
 92 |     # input (x1,y1,x2,y2)
 93 |     """
 94 |     [  x2-x1             x1 + x2 - W + 1  ]
 95 |     [  -----      0      ---------------  ]
 96 |     [  W - 1                  W - 1       ]
 97 |     [                                     ]
 98 |     [           y2-y1    y1 + y2 - H + 1  ]
 99 |     [    0      -----    ---------------  ]
100 |     [           H - 1         H - 1      ]
101 |     """
102 |     rois = rois.detach()
103 |     batch_size = bottom.size(0)
104 |     D = bottom.size(1)
105 |     H = bottom.size(2)
106 |     W = bottom.size(3)
107 |     roi_per_batch = rois.size(0) / batch_size
108 |     x1 = rois[:, 1::4] / 16.0
109 |     y1 = rois[:, 2::4] / 16.0
110 |     x2 = rois[:, 3::4] / 16.0
111 |     y2 = rois[:, 4::4] / 16.0
112 | 
113 |     height = bottom.size(2)
114 |     width = bottom.size(3)
115 | 
116 |     # affine theta
117 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
118 |     theta = torch.cat([\
119 |       (x2 - x1) / (width - 1),
120 |       zero,
121 |       (x1 + x2 - width + 1) / (width - 1),
122 |       zero,
123 |       (y2 - y1) / (height - 1),
124 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
125 | 
126 |     if max_pool:
127 |       pre_pool_size = cfg.POOLING_SIZE * 2
128 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size)))
129 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
130 |                                                                 .contiguous().view(-1, D, H, W)
131 |       crops = F.grid_sample(bottom, grid)
132 |       crops = F.max_pool2d(crops, 2, 2)
133 |     else:
134 |       grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE)))
135 |       bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
136 |                                                                 .contiguous().view(-1, D, H, W)
137 |       crops = F.grid_sample(bottom, grid)
138 | 
139 |     return crops, grid
140 | 
141 | def _affine_grid_gen(rois, input_size, grid_size):
142 | 
143 |     rois = rois.detach()
144 |     x1 = rois[:, 1::4] / 16.0
145 |     y1 = rois[:, 2::4] / 16.0
146 |     x2 = rois[:, 3::4] / 16.0
147 |     y2 = rois[:, 4::4] / 16.0
148 | 
149 |     height = input_size[0]
150 |     width = input_size[1]
151 | 
152 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
153 |     theta = torch.cat([\
154 |       (x2 - x1) / (width - 1),
155 |       zero,
156 |       (x1 + x2 - width + 1) / (width - 1),
157 |       zero,
158 |       (y2 - y1) / (height - 1),
159 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
160 | 
161 |     grid = F.affine_grid(theta, torch.Size((rois.size(0), 1, grid_size, grid_size)))
162 | 
163 |     return grid
164 | 
165 | def _affine_theta(rois, input_size):
166 | 
167 |     rois = rois.detach()
168 |     x1 = rois[:, 1::4] / 16.0
169 |     y1 = rois[:, 2::4] / 16.0
170 |     x2 = rois[:, 3::4] / 16.0
171 |     y2 = rois[:, 4::4] / 16.0
172 | 
173 |     height = input_size[0]
174 |     width = input_size[1]
175 | 
176 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
177 | 
178 |     # theta = torch.cat([\
179 |     #   (x2 - x1) / (width - 1),
180 |     #   zero,
181 |     #   (x1 + x2 - width + 1) / (width - 1),
182 |     #   zero,
183 |     #   (y2 - y1) / (height - 1),
184 |     #   (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
185 | 
186 |     theta = torch.cat([\
187 |       (y2 - y1) / (height - 1),
188 |       zero,
189 |       (y1 + y2 - height + 1) / (height - 1),
190 |       zero,
191 |       (x2 - x1) / (width - 1),
192 |       (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3)
193 | 
194 |     return theta
195 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/multiway_loader.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import time
  4 | import pdb
  5 | import cv2
  6 | import torch.utils.data as data
  7 | import torch
  8 | import os
  9 | from pathlib import Path
 10 | from PIL import Image
 11 | from scipy.misc import imread
 12 | 
 13 | from roi_data_layer.minibatch import get_minibatch, get_minibatch
 14 | from model.utils.config import cfg
 15 | from model.rpn.bbox_transform import bbox_transform_inv, clip_boxes
 16 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
 17 | 
 18 | from pycocotools.coco import COCO
 19 | 
 20 | 
 21 | class MultiwayLoader(data.Dataset):
 22 |     def __init__(self, epi_random_seed, imdb, roidb, ratio_list, ratio_index, support_dir,
 23 |                 batch_size, num_classes, num_shot=5, training=True, normalize=None, num_way=1):
 24 |         self._imdb = imdb
 25 |         self._roidb = roidb
 26 |         self._num_classes = num_classes
 27 |         self.trim_height = cfg.TRAIN.TRIM_HEIGHT
 28 |         self.trim_width = cfg.TRAIN.TRIM_WIDTH
 29 |         self.max_num_box = cfg.MAX_NUM_GT_BOXES
 30 |         self.training = training
 31 |         self.normalize = normalize
 32 |         self.ratio_list = ratio_list
 33 |         self.ratio_index = ratio_index
 34 |         self.batch_size = batch_size
 35 |         self.data_size = len(self.ratio_list)
 36 |         self.epi_random_seed = epi_random_seed
 37 |         self.num_way = num_way
 38 |         #############################################################################
 39 |         # roidb:
 40 |         # {'width': 640, 'height': 484, 'boxes': array([[ 58, 152, 268, 243]], dtype=uint16), 
 41 |         # 'gt_classes': array([79], dtype=int32), flipped': False, 'seg_areas': array([12328.567], dtype=float32),
 42 |         # 'img_id': 565198, 'image': '/home/tungi/FSOD/data/coco/images/val2014/COCO_val2014_000000565198.jpg', 
 43 |         # 'max_classes': array([79]), 'max_overlaps': array([1.], dtype=float32), 'need_crop': 0}
 44 | 
 45 |         # name_to_coco_cls_ind = {'person': 1, 'bicycle': 2, 'car': 3, 'motorcycle': 4, 'airplane': 5, 'bus': 6, 'train': 7,
 46 |         #  	'truck': 8, 'boat': 9, 'traffic light': 10, 'fire hydrant': 11, 'stop sign': 13, 'parking meter': 14, 'bench': 15,
 47 |         # 	'bird': 16, 'cat': 17, 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24,
 48 |         # 	'giraffe': 25, 'backpack': 27, 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 'frisbee': 34, 'skis': 35,
 49 |         # 	'snowboard': 36, 'sports ball': 37, 'kite': 38, 'baseball bat': 39, 'baseball glove': 40, 'skateboard': 41, 'surfboard': 42,
 50 |         # 	'tennis racket': 43, 'bottle': 44, 'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 
 51 |         # 	'banana': 52, 'apple': 53, 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58, 'pizza': 59, 
 52 |         # 	'donut': 60, 'cake': 61, 'chair': 62, 'couch': 63, 'potted plant': 64, 'bed': 65, 'dining table': 67, 'toilet': 70, 'tv': 72,
 53 |         # 	'laptop': 73, 'mouse': 74, 'remote': 75, 'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 
 54 |         # 	'sink': 81, 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88, 'hair drier': 89, 'toothbrush': 90}
 55 |         #############################################################################
 56 | 
 57 |         self.support_im_size = 320
 58 |         self.testing_shot = num_shot
 59 | 
 60 |         self.support_pool = [[] for i in range(self._num_classes)]
 61 |         self._label_to_cls_name = dict(list(zip(list(range(self._num_classes)), self._imdb.classes)))
 62 |         for _label in range(1, self._num_classes):
 63 |             cls_name = self._label_to_cls_name[_label]
 64 |             cls_dir = os.path.join(support_dir, cls_name)
 65 |             support_im_paths = [str(_p) for _p in list(Path(cls_dir).glob('*.jpg'))]
 66 |             if len(support_im_paths) == 0:
 67 |                 raise Exception(f'support data not found in {cls_dir}')
 68 |             random.seed(epi_random_seed)  # fix the shots
 69 |             support_im_paths = random.sample(support_im_paths, k=self.testing_shot)
 70 |             self.support_pool[_label].extend(support_im_paths)
 71 | 
 72 | 
 73 |     def __getitem__(self, index):
 74 |         # testing
 75 |         index_ratio = index
 76 |         # though it is called minibatch, in fact it contains only one img here
 77 |         minibatch_db = [self._roidb[index_ratio]]
 78 | 
 79 |         # load query
 80 |         blobs = get_minibatch(minibatch_db)
 81 |         data = torch.from_numpy(blobs['data'])
 82 |         im_info = torch.from_numpy(blobs['im_info'])  # (H, W, scale)
 83 |         data_height, data_width = data.size(1), data.size(2)
 84 |         data = data.permute(0, 3, 1, 2).contiguous().view(3, data_height, data_width)
 85 |         im_info = im_info.view(3)
 86 |         gt_boxes = torch.from_numpy(blobs['gt_boxes'])
 87 |         num_boxes = gt_boxes.size(0)
 88 |         all_cls_in_im = []
 89 |         for i in range(num_boxes):
 90 |             _cls = int(gt_boxes[i, 4])
 91 |             all_cls_in_im.append(_cls)
 92 |         all_cls_in_im = list(set(all_cls_in_im))
 93 |         if len(all_cls_in_im) > self.num_way:
 94 |             random.seed(self.epi_random_seed)  # fix 
 95 |             selected_ways = random.sample(all_cls_in_im, k=self.num_way)
 96 |         else:
 97 |             other_cls = list(range(self._num_classes))
 98 |             other_cls.remove(0)
 99 |             for _cls_ind in all_cls_in_im:
100 |                 other_cls.remove(_cls_ind)
101 |             random.seed(self.epi_random_seed)  # fix 
102 |             random_neg_cls = random.sample(other_cls, k=(self.num_way - len(all_cls_in_im)))
103 |             selected_ways = all_cls_in_im
104 |             selected_ways.extend(random_neg_cls)
105 | 
106 |         # get supports
107 |         support_data_all = np.zeros((self.testing_shot * self.num_way, 3, self.support_im_size, self.support_im_size), dtype=np.float32)
108 | 
109 |         for n in range(self.num_way):
110 |             selected_supports = self.support_pool[selected_ways[n]]
111 |             
112 |             for i, _path in enumerate(selected_supports):
113 |                 support_im = imread(_path)[:,:,::-1]  # rgb -> bgr
114 |                 target_size = np.min(support_im.shape[0:2])  # don't change the size
115 |                 support_im, _ = prep_im_for_blob(support_im, cfg.PIXEL_MEANS, target_size, cfg.TRAIN.MAX_SIZE)
116 |                 _h, _w = support_im.shape[0], support_im.shape[1]
117 |                 if _h > _w:
118 |                     resize_scale = float(self.support_im_size) / float(_h)
119 |                     unfit_size = int(_w * resize_scale)
120 |                     support_im = cv2.resize(support_im, (unfit_size, self.support_im_size), interpolation=cv2.INTER_LINEAR)
121 |                 else:
122 |                     resize_scale = float(self.support_im_size) / float(_w)
123 |                     unfit_size = int(_h * resize_scale)
124 |                     support_im = cv2.resize(support_im, (self.support_im_size, unfit_size), interpolation=cv2.INTER_LINEAR)
125 |                 h, w = support_im.shape[0], support_im.shape[1]
126 |                 support_data_all[self.testing_shot*n+i, :, :h, :w] = np.transpose(support_im, (2, 0, 1)) 
127 |             supports = torch.from_numpy(support_data_all)
128 | 
129 | 
130 |         return data, im_info, gt_boxes, num_boxes, supports, selected_ways
131 | 
132 |     def __len__(self):
133 |         return len(self._roidb)


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import argparse
  4 | import time
  5 | import pickle
  6 | import cv2
  7 | import sys
  8 | import torch
  9 | import torch.nn as nn
 10 | from torch.autograd import Variable
 11 | from tqdm import tqdm
 12 | from matplotlib import pyplot as plt
 13 | from roi_data_layer.roidb import combined_roidb
 14 | from roi_data_layer.inference_loader import InferenceLoader
 15 | from roi_data_layer.general_test_loader import GeneralTestLoader
 16 | from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
 17 | from model.rpn.bbox_transform import clip_boxes
 18 | from model.roi_layers import nms
 19 | from model.rpn.bbox_transform import bbox_transform_inv
 20 | from model.utils.net_utils import save_net, load_net, vis_detections
 21 | from model.utils.fsod_logger import FSODInferenceLogger
 22 | from utils import *
 23 | 
 24 | 
 25 | if __name__ == '__main__':
 26 | 
 27 |     args = parse_args()
 28 |     print(args)
 29 |     cfg_from_file(args.cfg_file)
 30 |     cfg_from_list(args.set_cfgs)
 31 | 
 32 |     # prepare roidb
 33 |     cfg.TRAIN.USE_FLIPPED = False
 34 |     imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False)
 35 |     CWD = os.getcwd()
 36 |     support_dir = os.path.join(CWD, 'data/supports', args.sup_dir)
 37 | 
 38 |     # load dir
 39 |     input_dir = os.path.join(args.load_dir, "train/checkpoints")
 40 |     if not os.path.exists(input_dir):
 41 |         raise Exception('There is no input directory for loading network from ' + input_dir)
 42 |     load_name = os.path.join(input_dir,
 43 |         'model_{}_{}.pth'.format(args.checkepoch, args.checkpoint))
 44 | 
 45 |     # initilize the network
 46 |     classes = ['fg', 'bg']
 47 |     model = get_model(args.net, pretrained=False, way=args.way, shot=args.shot, classes=classes)
 48 |     print("load checkpoint %s" % (load_name))
 49 |     checkpoint = torch.load(load_name)
 50 |     model.load_state_dict(checkpoint['model'])
 51 |     if args.mGPUs:
 52 |         model = model.module
 53 |     if 'pooling_mode' in checkpoint.keys():
 54 |         cfg.POOLING_MODE = checkpoint['pooling_mode']
 55 |     print('load model successfully!')
 56 |     cfg.CUDA = True
 57 |     model.cuda()
 58 |     model.eval()
 59 | 
 60 |     # initilize the tensor holders
 61 |     holders = prepare_var(support=True)
 62 |     im_data = holders[0]
 63 |     im_info = holders[1]
 64 |     num_boxes = holders[2]
 65 |     gt_boxes = holders[3]
 66 |     support_ims = holders[4]
 67 | 
 68 |     # prepare holder for predicted boxes
 69 |     start = time.time()
 70 |     max_per_image = 100
 71 |     thresh = 0.05
 72 |     num_images = len(imdb.image_index)
 73 |     all_boxes = [[[] for _ in range(num_images)]
 74 |                 for _ in range(imdb.num_classes)]
 75 |     _t = {'im_detect': time.time(), 'misc': time.time()}
 76 | 
 77 |     model.eval()
 78 |     empty_array = np.transpose(np.array([[],[],[],[],[]]), (1,0))
 79 | 
 80 |     imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False)
 81 |     imdb.competition_mode(on=True)
 82 |     dataset = InferenceLoader(0, imdb, roidb, ratio_list, ratio_index, support_dir, 
 83 |                             1, len(imdb._classes), num_shot=args.shot, training=False, normalize=False)
 84 | 
 85 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True)
 86 |     data_iter = iter(dataloader)
 87 | 
 88 |     for i in tqdm(range(num_images)):
 89 |         data = next(data_iter)
 90 |         with torch.no_grad():
 91 |             im_data.resize_(data[0].size()).copy_(data[0])
 92 |             im_info.resize_(data[1].size()).copy_(data[1])
 93 |             gt_boxes.resize_(data[2].size()).copy_(data[2])
 94 |             num_boxes.resize_(data[3].size()).copy_(data[3])
 95 |             support_ims.resize_(data[4].size()).copy_(data[4])
 96 | 
 97 | 
 98 |         det_tic = time.time()
 99 |         with torch.no_grad():
100 |             rois, cls_prob, bbox_pred, \
101 |             rpn_loss_cls, rpn_loss_box, \
102 |             RCNN_loss_cls, RCNN_loss_bbox, \
103 |             rois_label = model(im_data, im_info, gt_boxes, num_boxes, support_ims)
104 |         det_toc = time.time()
105 |         detect_time = det_toc - det_tic
106 |         misc_tic = time.time()
107 | 
108 |         scores = cls_prob.data
109 |         boxes = rois.data[:, :, 1:5]
110 | 
111 |         # Apply bounding-box regression deltas
112 |         box_deltas = bbox_pred.data
113 |         if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
114 |         # Optionally normalize targets by a precomputed mean and stdev
115 | 
116 |             box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
117 |                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
118 |             box_deltas = box_deltas.view(1, -1, 4)
119 | 
120 | 
121 |         pred_boxes = bbox_transform_inv(boxes, box_deltas, 1)
122 |         pred_boxes = clip_boxes(pred_boxes, im_info.data, 1)
123 | 
124 |         # re-scale boxes to the origin img scale
125 |         pred_boxes /= data[1][0][2].item()
126 | 
127 |         scores = scores.squeeze()
128 |         pred_boxes = pred_boxes.squeeze()
129 |         
130 | 
131 |         for j in range(1, imdb.num_classes):
132 |             if j != gt_boxes[0, 0, 4]:
133 |                 all_boxes[j][i] = empty_array
134 |                 continue
135 |             inds = torch.nonzero(scores[:,1]>thresh).view(-1)
136 |             if inds.numel() > 0:
137 |                 cls_scores = scores[:,1][inds]
138 |                 cls_boxes = pred_boxes[inds, :]
139 |                 cls_dets = NMS(cls_boxes, cls_scores)
140 |                 all_boxes[j][i] = cls_dets.cpu().numpy()
141 |             else:
142 |                 all_boxes[j][i] = empty_array
143 | 
144 |         misc_toc = time.time()
145 |         nms_time = misc_toc - misc_tic
146 | 
147 |         # if args.imlog:
148 |         #     origin_im = im_data[0].permute(1, 2, 0).contiguous().cpu().numpy()[:, :, ::-1]
149 |         #     origin_im = origin_im - origin_im.min()
150 |         #     origin_im /= origin_im.max()
151 |         #     gt_im = origin_im.copy()
152 |         #     pt_im = origin_im.copy()
153 |         #     np_gt_boxes = gt_boxes[0]
154 |         #     for n in range(np_gt_boxes.shape[0]):
155 |         #         box = np_gt_boxes[n].clone()
156 |         #         cv2.rectangle(gt_im, (box[0], box[1]), (box[2], box[3]), (0.1, 1, 0.1), 2)
157 |         #     plt.imshow(gt_im)
158 |         #     plt.show()
159 |         #     sup_im = support_ims[0][0].permute(1, 2, 0).contiguous().cpu().numpy()[:, :, ::-1]
160 |         #     sup_im = sup_im - sup_im.min()
161 |         #     sup_im /= sup_im.max()
162 |         #     plt.imshow(sup_im)
163 |         #     plt.show()
164 |         #     raise Exception(' ')
165 | 
166 |             # raise Exception(' ')
167 |             # cv2.rectangle(im, (box[0], box[1]), (box[0] + box[2], box[1] + box[3]), (20, 255, 20), 2)
168 |             # tb_logger.write(i, gt, support_ims, predict, save_im=True)
169 | 
170 |     sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s   \r' \
171 |             .format(i + 1, num_images, detect_time, nms_time))
172 |     sys.stdout.flush()
173 | 
174 |     output_dir = os.path.join(CWD, 'inference_output', args.eval_dir)
175 |     if not os.path.exists(output_dir):
176 |         os.makedirs(output_dir)
177 |     det_file = os.path.join(output_dir, 'detections.pkl')
178 |     with open(det_file, 'wb') as f:
179 |         pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
180 |     print('Evaluating detections')
181 |     imdb.evaluate_detections(all_boxes, output_dir)
182 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | 
 15 | def parse_rec(filename):
 16 |   """ Parse a PASCAL VOC xml file """
 17 |   tree = ET.parse(filename)
 18 |   objects = []
 19 |   for obj in tree.findall('object'):
 20 |     obj_struct = {}
 21 |     obj_struct['name'] = obj.find('name').text
 22 |     obj_struct['pose'] = obj.find('pose').text
 23 |     obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |     obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |     bbox = obj.find('bndbox')
 26 |     obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                           int(bbox.find('ymin').text),
 28 |                           int(bbox.find('xmax').text),
 29 |                           int(bbox.find('ymax').text)]
 30 |     objects.append(obj_struct)
 31 | 
 32 |   return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |   Compute VOC AP given precision and recall.
 38 |   If use_07_metric is true, uses the
 39 |   VOC 07 11 point method (default:False).
 40 |   """
 41 |   if use_07_metric:
 42 |     # 11 point metric
 43 |     ap = 0.
 44 |     for t in np.arange(0., 1.1, 0.1):
 45 |       if np.sum(rec >= t) == 0:
 46 |         p = 0
 47 |       else:
 48 |         p = np.max(prec[rec >= t])
 49 |       ap = ap + p / 11.
 50 |   else:
 51 |     # correct AP calculation
 52 |     # first append sentinel values at the end
 53 |     mrec = np.concatenate(([0.], rec, [1.]))
 54 |     mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |     # compute the precision envelope
 57 |     for i in range(mpre.size - 1, 0, -1):
 58 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |     # to calculate area under PR curve, look for points
 61 |     # where X axis (recall) changes value
 62 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |     # and sum (\Delta recall) * prec
 65 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |   return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath,
 70 |              annopath,
 71 |              imagesetfile,
 72 |              classname,
 73 |              cachedir,
 74 |              ovthresh=0.5,
 75 |              use_07_metric=False):
 76 |   """rec, prec, ap = voc_eval(detpath,
 77 |                               annopath,
 78 |                               imagesetfile,
 79 |                               classname,
 80 |                               [ovthresh],
 81 |                               [use_07_metric])
 82 | 
 83 |   Top level function that does the PASCAL VOC evaluation.
 84 | 
 85 |   detpath: Path to detections
 86 |       detpath.format(classname) should produce the detection results file.
 87 |   annopath: Path to annotations
 88 |       annopath.format(imagename) should be the xml annotations file.
 89 |   imagesetfile: Text file containing the list of images, one image per line.
 90 |   classname: Category name (duh)
 91 |   cachedir: Directory for caching the annotations
 92 |   [ovthresh]: Overlap threshold (default = 0.5)
 93 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
 94 |       (default False)
 95 |   """
 96 |   # assumes detections are in detpath.format(classname)
 97 |   # assumes annotations are in annopath.format(imagename)
 98 |   # assumes imagesetfile is a text file with each line an image name
 99 |   # cachedir caches the annotations in a pickle file
100 | 
101 |   # first load gt
102 |   if not os.path.isdir(cachedir):
103 |     os.mkdir(cachedir)
104 |   cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
105 |   # read list of images
106 |   with open(imagesetfile, 'r') as f:
107 |     lines = f.readlines()
108 |   imagenames = [x.strip() for x in lines]
109 | 
110 |   if not os.path.isfile(cachefile):
111 |     # load annotations
112 |     recs = {}
113 |     for i, imagename in enumerate(imagenames):
114 |       recs[imagename] = parse_rec(annopath.format(imagename))
115 |       if i % 100 == 0:
116 |         print('Reading annotation for {:d}/{:d}'.format(
117 |           i + 1, len(imagenames)))
118 |     # save
119 |     print('Saving cached annotations to {:s}'.format(cachefile))
120 |     with open(cachefile, 'wb') as f:
121 |       pickle.dump(recs, f)
122 |   else:
123 |     # load
124 |     with open(cachefile, 'rb') as f:
125 |       try:
126 |         recs = pickle.load(f)
127 |       except:
128 |         recs = pickle.load(f, encoding='bytes')
129 | 
130 |   # extract gt objects for this class
131 |   class_recs = {}
132 |   npos = 0
133 |   for imagename in imagenames:
134 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
135 |     bbox = np.array([x['bbox'] for x in R])
136 |     difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
137 |     det = [False] * len(R)
138 |     npos = npos + sum(~difficult)
139 |     class_recs[imagename] = {'bbox': bbox,
140 |                              'difficult': difficult,
141 |                              'det': det}
142 | 
143 |   # read dets
144 |   detfile = detpath.format(classname)
145 |   with open(detfile, 'r') as f:
146 |     lines = f.readlines()
147 | 
148 |   splitlines = [x.strip().split(' ') for x in lines]
149 |   image_ids = [x[0] for x in splitlines]
150 |   confidence = np.array([float(x[1]) for x in splitlines])
151 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
152 | 
153 |   nd = len(image_ids)
154 |   tp = np.zeros(nd)
155 |   fp = np.zeros(nd)
156 | 
157 |   if BB.shape[0] > 0:
158 |     # sort by confidence
159 |     sorted_ind = np.argsort(-confidence)
160 |     sorted_scores = np.sort(-confidence)
161 |     BB = BB[sorted_ind, :]
162 |     image_ids = [image_ids[x] for x in sorted_ind]
163 | 
164 |     # go down dets and mark TPs and FPs
165 |     for d in range(nd):
166 |       R = class_recs[image_ids[d]]
167 |       bb = BB[d, :].astype(float)
168 |       ovmax = -np.inf
169 |       BBGT = R['bbox'].astype(float)
170 | 
171 |       if BBGT.size > 0:
172 |         # compute overlaps
173 |         # intersection
174 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
175 |         iymin = np.maximum(BBGT[:, 1], bb[1])
176 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
177 |         iymax = np.minimum(BBGT[:, 3], bb[3])
178 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
179 |         ih = np.maximum(iymax - iymin + 1., 0.)
180 |         inters = iw * ih
181 | 
182 |         # union
183 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
184 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
185 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
186 | 
187 |         overlaps = inters / uni
188 |         ovmax = np.max(overlaps)
189 |         jmax = np.argmax(overlaps)
190 | 
191 |       if ovmax > ovthresh:
192 |         if not R['difficult'][jmax]:
193 |           if not R['det'][jmax]:
194 |             tp[d] = 1.
195 |             R['det'][jmax] = 1
196 |           else:
197 |             fp[d] = 1.
198 |       else:
199 |         fp[d] = 1.
200 | 
201 |   # compute precision recall
202 |   fp = np.cumsum(fp)
203 |   tp = np.cumsum(tp)
204 |   rec = tp / float(npos)
205 |   # avoid divide by zero in case the first detection matches a difficult
206 |   # ground truth
207 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
208 |   ap = voc_ap(rec, prec, use_07_metric)
209 | 
210 |   return rec, prec, ap
211 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Dual-awareness Attention for Few-shot Object Detection
  2 | <!-- ![alt text](http://github.com/Tung-I/DAnA_FSOD/blob/main/attention_visualization.jpg?raw=true) -->
  3 | 
  4 | 
  5 | <!-- TABLE OF CONTENTS -->
  6 | <details open="open">
  7 |   <summary>Table of Contents</summary>
  8 |   <ol>
  9 |     <li>
 10 |       <a href="#introduction">Introduction</a>
 11 |     </li>
 12 |     <li>
 13 |       <a href="#getting-started">Getting Started</a>
 14 |       <ul>
 15 |         <li><a href="#prerequisites">Prerequisites</a></li>
 16 |         <li><a href="#data-preparation">Data Preparation</a></li>
 17 |         <li><a href="#pretrained-weights">Pretrained Weights</a></li>
 18 |         <li><a href="#installation">Installation</a></li>
 19 |       </ul>
 20 |     </li>
 21 |     <li><a href="#train">Train</a></li>
 22 |     <li><a href="#inference">Inference</a></li>
 23 |     <li><a href="#acknowledgements">Acknowledgements</a></li>
 24 |   </ol>
 25 | </details>
 26 | 
 27 | <!-- INTRODUCTION -->
 28 | ## Introduction
 29 | 
 30 | While recent progress has significantly boosted few-shot classification (FSC) performance, few-shot object detection (FSOD) remains challenging for modern learning systems.
 31 | Therefore, we propose DAnA (Dual-awareness Attention) mechanism which is adaptable to various existing object detection networks and enhances FSOD performance by paying adaptable attention to support images conditioned on given query information. The proposed method achieves SOTA results on COCO benchmark, outperforming the strongest baseline by 47% on performance.\
 32 | paper link: https://arxiv.org/abs/2102.12152
 33 | 
 34 | <br />
 35 | <p align="center">
 36 |   <a href="https://github.com/Tung-I/Dual-awareness-Attention-for-Few-shot-Object-Detection
 37 | ">
 38 |     <img src="images/prediction.jpg" alt="prediction" width="1024" height="660">
 39 |   </a>
 40 | </p>
 41 | 
 42 | <!-- GETTING STARTED -->
 43 | ## Getting Started
 44 | ### Prerequisites
 45 | * Python 3.6
 46 | * Cuda 10.0 or 10.1
 47 | * Pytorch 1.2.0 or higher
 48 | 
 49 | ### Data Preparation
 50 | 1. First, clone the repository and create a data folder:
 51 | ```
 52 | cd Dual-awareness-Attention-for-Few-shot-Object-Detection && mkdir data
 53 | ```
 54 | 2. Download the COCO dataset. Please follow the instruction in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models).
 55 | Create the symlinks to datasets.
 56 | ```
 57 | $ cd data
 58 | 
 59 | For VOC 2007
 60 | $ ln -s [your-path-to]/VOC2007/VOCdevkit VOCdevkit2007
 61 | 
 62 | For COCO
 63 | $ ln -s [your-path-to]/coco coco
 64 | ```
 65 | 
 66 | 3. The COCO dataset must be preprocessed to conform to the problem setting of FSOD. At training, we must remove the labels of novel instances in each query image. For testing, we should fix the target category of each query image to ensure the results are reproducible. For your convenience, we provide the preprocessed .json files of COCO for both training and testing. Users can process the COCO annotation to construct customized datasets for their research purposes as well.
 67 | * 60 base classes for training (https://drive.google.com/file/d/10mXvdpgSjFYML_9J-zMDLPuBYrSrG2ub/view?usp=sharing)
 68 | * 20 novel classes for testing (https://drive.google.com/file/d/1FZJhC-Ob-IXTKf5heNeNAN00V8OUJXi2/view?usp=sharing)
 69 | To use them, simply put the folder into *COCO annotations*.
 70 | ```
 71 | $ mv coco60_train [yout-path-to]/coco/annotations/coco60_train 
 72 | ```
 73 | For those who want to apply customized annotations, please refer to lib/datasets/factory.py and lib/datasets/coco_split.py.
 74 | 
 75 | 4. At training, the support images are image patches randomly cropped from other query images according to box annotations. At testing, to ensure the results are reproducible, a set of support images of 80 categories should be constructed in advance. The support image set we used is available [here](https://drive.google.com/file/d/1nl9-DEpBBJ5w6hxVdijY6hFxoQdz8aso/view?usp=sharing). To use them:
 76 | ```
 77 | Create the soft link of support imgs 
 78 | $ ln -s /your/path/to/supports supports
 79 | ```
 80 | 5. Create the folder to save model weights
 81 | ```
 82 | $ mkdir models
 83 | ```
 84 | 
 85 | ### Pretrained Weights
 86 | Please download the pretrained backbone models (e.g., res50, vgg16) and put them into data/pretrained_model, which can be found in [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn#beyond-the-demo-installation-for-training-and-testing-models). 
 87 | ```
 88 | $ mkdir data/pretrained_model && cd data/pretrained_model
 89 | $ ln -s /your/path/to/res50.pth res50.pth
 90 | ```
 91 | **NOTE**. We would suggest to use Caffe pretrained models to reproduce our results.
 92 | **If you want to use pytorch pre-trained models, please remember to transpose images from BGR to RGB, and also use the same data transformer (minus mean and normalize) as used in pretrained model.**
 93 | 
 94 | For those who would like to test the model only, the weights of DAnA can be download [here](https://drive.google.com/file/d/1JaYF-Ep-C6b5X01_e9tFRzFgRXMJQYQ7/view?usp=sharing). **NOTE**. The provided fine-tuned model weights "cisa_coco_ft30" was fine-tuned on 30-shot novel object classes without using BA block. Therefore, to use them, please set get_model(..., use_BA_block=False) at train.py.   
 95 | ```
 96 | $ cd models
 97 | $ ln -s [your-path-to]/cisa_coco_ft30 cisa_coco_ft30
 98 | ```
 99 | 
100 | ### Installation
101 | Install the conda environment.
102 | ```
103 | $ conda env create -f env.yml
104 | $ source activate [NAME_OF_THE_ENV]
105 | ```
106 | Compile COCO API.
107 | ```
108 | $ cd lib
109 | $ git clone https://github.com/pdollar/coco.git 
110 | $ cd coco/PythonAPI
111 | $ make && make install
112 | put pycocotools under data/
113 | $ mv cocoapi/PythonAPI/pycocotools .
114 | ```
115 | Compile the cuda dependencies using following commands.
116 | ```
117 | $ cd lib
118 | $ python setup.py build develop
119 | ```
120 | If you are confronted with error during the compilation, you might miss to export the CUDA paths to your environment.
121 | 
122 | ## Train
123 | 
124 | 
125 | ***To train from scratch***
126 | ```
127 | $ python train.py --dataset coco_base --flip --net DAnA --lr 0.001 --lr_decay_step 12 --bs 4 --epochs 16 --disp_interval 20 --save_dir models/DAnA --way 2 --shot 3 
128 | ```
129 | 
130 | ***To resume***
131 | ```
132 | $ python train.py --dataset coco_base --flip --net DAnA --lr 0.001 --lr_decay_step 12 --bs 4 --epochs 16 --disp_interval 20 --save_dir models/DAnA --way 2 --shot 3 --r --load_dir models/DAnA --checkepoch 12 --checkpoint 4307
133 | ```
134 | 
135 | <!-- ***To fine-tune***
136 | <br>
137 | The same as continuing training. You can simply replace the dataset with fine-tuning dataset prepared beforehand and select a smaller learning rate like 0.0001. 
138 | 
139 | The json files of the fine-tuning datasets comprised of 20 novel COCO categories can be found here: 
140 | <br>
141 | [10 shots](https://drive.google.com/file/d/1eUZpc6KpSouZm8QL5s2EHXi4sXJXn89X/view?usp=sharing)
142 | <br>
143 | [30 shots](https://drive.google.com/file/d/1zCj4Sbhd2FjHVxlmlk4Nfb9qqJcqMHRy/view?usp=sharing) -->
144 | 
145 | ## Inference
146 | ```
147 | $ python inference.py --eval --dataset val2014_novel --net DAnA --r --load_dir models/DAnA_coco_ft30 --checkepoch 4 --checkpoint 299 --bs 1 --shot 3 --eval_dir dana
148 | ```
149 | 
150 | ## Attention Visualization
151 | <br />
152 | <p align="center">
153 |   <a href="https://github.com/Tung-I/Dual-awareness-Attention-for-Few-shot-Object-Detection
154 | ">
155 |     <img src="images/attention_visualization.jpg" alt="attention_visualization" width="1024" height="280">
156 |   </a>
157 | </p>
158 | 
159 | ## Acknowledgements
160 | This work was supported in part by the Ministry of Science and Technology, Taiwan, under Grant MOST 110-2634-F-002-026. We benefit from NVIDIA DGX-1 AI Supercomputer and are grateful to the National Center for High-performance Computing. The code is mainly build on [faster-rcnn.pytorch](https://github.com/jwyang/faster-rcnn.pytorch/tree/pytorch-1.0).
161 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import argparse
  5 | import time
  6 | import random
  7 | import cv2
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | from torch.autograd import Variable
 12 | from tqdm import tqdm
 13 | 
 14 | from roi_data_layer.roidb import combined_roidb
 15 | from roi_data_layer.fs_loader import FewShotLoader, sampler
 16 | from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
 17 | from model.utils.net_utils import weights_normal_init, save_net, load_net, \
 18 |       adjust_learning_rate, save_checkpoint, clip_gradient
 19 | from model.utils.fsod_logger import FSODLogger
 20 | 
 21 | from utils import *
 22 | 
 23 | 
 24 | if __name__ == '__main__':
 25 | 
 26 |     args = parse_args()
 27 |     print(args)
 28 | 
 29 |     cfg_from_file(args.cfg_file)
 30 |     cfg_from_list(args.set_cfgs)
 31 | 
 32 |     # make results determinable
 33 |     random_seed = 1996
 34 |     np.random.seed(random_seed)
 35 |     random.seed(random_seed)
 36 |     torch.manual_seed(random_seed)
 37 |     torch.cuda.manual_seed_all(random_seed)
 38 |     torch.backends.cudnn.deterministic = True
 39 |     torch.backends.cudnn.benchmark = False
 40 |     cfg.CUDA = True
 41 | 
 42 |     # prepare output dir
 43 |     output_dir = os.path.join(args.save_dir, "train/checkpoints") 
 44 |     if not os.path.exists(output_dir):
 45 |         os.makedirs(output_dir)
 46 | 
 47 |     # prepare dataloader
 48 |     cfg.TRAIN.USE_FLIPPED = args.use_flip
 49 |     cfg.USE_GPU_NMS = True
 50 |     imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name)
 51 | 
 52 | 
 53 |     dataset = FewShotLoader(roidb, ratio_list, ratio_index, args.batch_size, \
 54 |                             imdb.num_classes, training=True, num_way=args.way, num_shot=args.shot)
 55 |     train_size = len(roidb)
 56 |     print('{:d} roidb entries'.format(len(roidb)))
 57 |     sampler_batch = sampler(train_size, args.batch_size)
 58 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
 59 |                             sampler=sampler_batch, num_workers=args.num_workers)
 60 | 
 61 |     # initilize the tensor holders
 62 |     holders = prepare_var(support=True)
 63 |     im_data = holders[0]
 64 |     im_info = holders[1]
 65 |     num_boxes = holders[2]
 66 |     gt_boxes = holders[3]
 67 |     support_ims = holders[4]
 68 | 
 69 |     # initilize the network
 70 |     pre_weight = False if args.resume else True
 71 |     classes = ['fg', 'bg']
 72 |     model = get_model(args.net, pretrained=pre_weight, way=args.way, shot=args.shot, classes=classes)
 73 |     model.cuda()
 74 | 
 75 |     # optimizer
 76 |     lr = cfg.TRAIN.LEARNING_RATE
 77 |     lr = args.lr
 78 |     params = []
 79 |     for key, value in dict(model.named_parameters()).items():
 80 |         if value.requires_grad:
 81 |             if 'bias' in key:
 82 |                 params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \
 83 |                         'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
 84 |             else:
 85 |                 params += [{'params':[value],'lr':lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}]
 86 |     if args.optimizer == "adam":
 87 |         optimizer = torch.optim.Adam(params)
 88 |     elif args.optimizer == "sgd":
 89 |         optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)
 90 | 
 91 |     # load checkpoints 
 92 |     if args.resume:
 93 |         load_dir = os.path.join(args.load_dir, "train/checkpoints")
 94 |         load_name = os.path.join(load_dir, f'model_{args.checkepoch}_{args.checkpoint}.pth')
 95 |         checkpoint = torch.load(load_name)
 96 |         args.start_epoch = checkpoint['epoch']
 97 |         model.load_state_dict(checkpoint['model'])
 98 |         optimizer.load_state_dict(checkpoint['optimizer'])
 99 |         lr = optimizer.param_groups[0]['lr']
100 |         if 'pooling_mode' in checkpoint.keys():
101 |             cfg.POOLING_MODE = checkpoint['pooling_mode']
102 |         print(f'loaded checkpoint: {load_name}')
103 | 
104 |     if args.mGPUs:
105 |         model = nn.DataParallel(model)
106 | 
107 |     # initialize logger
108 |     if not args.dlog:
109 |         logger_save_dir = os.path.join(args.save_dir, "train")
110 |         tb_logger = FSODLogger(logger_save_dir)
111 | 
112 |     # training
113 |     iters_per_epoch = int(train_size / args.batch_size)
114 |     for epoch in range(args.start_epoch, args.max_epochs + 1):
115 |         model.train()
116 |         loss_temp = 0
117 |         start_time = time.time()
118 |         if epoch % (args.lr_decay_step + 1) == 0:
119 |             adjust_learning_rate(optimizer, args.lr_decay_gamma)
120 |             lr *= args.lr_decay_gamma
121 |         data_iter = iter(dataloader)
122 |         for step in range(iters_per_epoch):
123 |             data = next(data_iter)
124 |             with torch.no_grad():
125 |                 im_data.resize_(data[0].size()).copy_(data[0])
126 |                 im_info.resize_(data[1].size()).copy_(data[1])
127 |                 gt_boxes.resize_(data[2].size()).copy_(data[2])
128 |                 num_boxes.resize_(data[3].size()).copy_(data[3])
129 |                 support_ims.resize_(data[4].size()).copy_(data[4])
130 | 
131 |             model.zero_grad()
132 | 
133 |             rois, cls_prob, bbox_pred, \
134 |             rpn_loss_cls, rpn_loss_box, \
135 |             RCNN_loss_cls, RCNN_loss_bbox, \
136 |             rois_label = model(im_data, im_info, gt_boxes, num_boxes, support_ims)
137 |    
138 |             loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
139 |                 + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()
140 |             loss_temp += loss.item()
141 | 
142 |             optimizer.zero_grad()
143 |             loss.backward()
144 |             optimizer.step()
145 | 
146 |             if step % args.disp_interval == 0:
147 |                 end_time = time.time()
148 |                 if step > 0:
149 |                     loss_temp /= (args.disp_interval + 1)
150 |                 if args.mGPUs:
151 |                     loss_rpn_cls = rpn_loss_cls.mean().item()
152 |                     loss_rpn_box = rpn_loss_box.mean().item()
153 |                     loss_rcnn_cls = RCNN_loss_cls.mean().item()
154 |                     loss_rcnn_box = RCNN_loss_bbox.mean().item()
155 |                     fg_cnt = torch.sum(rois_label.data.ne(0))
156 |                     bg_cnt = rois_label.data.numel() - fg_cnt
157 |                 else:
158 |                     loss_rpn_cls = rpn_loss_cls.item()
159 |                     loss_rpn_box = rpn_loss_box.item()
160 |                     loss_rcnn_cls = RCNN_loss_cls.item()
161 |                     loss_rcnn_box = RCNN_loss_bbox.item()
162 |                     fg_cnt = torch.sum(rois_label.data.ne(0))
163 |                     bg_cnt = rois_label.data.numel() - fg_cnt
164 | 
165 |                 print("[epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
166 |                                         % (epoch, step, iters_per_epoch, loss_temp, lr))
167 |                 print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end_time-start_time))
168 |                 print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
169 |                             % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))
170 | 
171 |                 info = {
172 |                 'loss': loss_temp,
173 |                 'loss_rpn_cls': loss_rpn_cls,
174 |                 'loss_rpn_box': loss_rpn_box,
175 |                 'loss_rcnn_cls': loss_rcnn_cls,
176 |                 'loss_rcnn_box': loss_rcnn_box
177 |                 }
178 |                 loss_temp = 0
179 |                 start_time = time.time()
180 |         if not args.dlog:
181 |             tb_logger.write(epoch, info, save_im=args.imlog)
182 | 
183 |         save_name = os.path.join(output_dir, 'model_{}_{}.pth'.format(epoch, step))
184 |         save_checkpoint({
185 |             'epoch': epoch + 1,
186 |             'model': model.module.state_dict() if args.mGPUs else model.state_dict(),
187 |             'optimizer': optimizer.state_dict(),
188 |             'pooling_mode': cfg.POOLING_MODE,
189 |         }, save_name)
190 |         print('save model: {}'.format(save_name))
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | #include <omp.h>
  4 | 
  5 | 
  6 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
  7 |                      const int height, const int width, const int channels,
  8 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
  9 |                      float* top_data);
 10 | 
 11 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
 12 |                      const int height, const int width, const int channels,
 13 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
 14 |                      float* top_data);
 15 | 
 16 | int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
 17 |                      THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
 18 | {
 19 |     //Grab the input tensor
 20 |     float * data_flat = THFloatTensor_data(features);
 21 |     float * rois_flat = THFloatTensor_data(rois);
 22 | 
 23 |     float * output_flat = THFloatTensor_data(output);
 24 | 
 25 |     // Number of ROIs
 26 |     int num_rois = THFloatTensor_size(rois, 0);
 27 |     int size_rois = THFloatTensor_size(rois, 1);
 28 |     if (size_rois != 5)
 29 |     {
 30 |         return 0;
 31 |     }
 32 | 
 33 |     // data height
 34 |     int data_height = THFloatTensor_size(features, 2);
 35 |     // data width
 36 |     int data_width = THFloatTensor_size(features, 3);
 37 |     // Number of channels
 38 |     int num_channels = THFloatTensor_size(features, 1);
 39 | 
 40 |     // do ROIAlignForward
 41 |     ROIAlignForwardCpu(data_flat, spatial_scale, num_rois, data_height, data_width, num_channels,
 42 |             aligned_height, aligned_width, rois_flat, output_flat);
 43 | 
 44 |     return 1;
 45 | }
 46 | 
 47 | int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
 48 |                        THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad)
 49 | {
 50 |     //Grab the input tensor
 51 |     float * top_grad_flat = THFloatTensor_data(top_grad);
 52 |     float * rois_flat = THFloatTensor_data(rois);
 53 | 
 54 |     float * bottom_grad_flat = THFloatTensor_data(bottom_grad);
 55 | 
 56 |     // Number of ROIs
 57 |     int num_rois = THFloatTensor_size(rois, 0);
 58 |     int size_rois = THFloatTensor_size(rois, 1);
 59 |     if (size_rois != 5)
 60 |     {
 61 |         return 0;
 62 |     }
 63 | 
 64 |     // batch size
 65 |     // int batch_size = THFloatTensor_size(bottom_grad, 0);
 66 |     // data height
 67 |     int data_height = THFloatTensor_size(bottom_grad, 2);
 68 |     // data width
 69 |     int data_width = THFloatTensor_size(bottom_grad, 3);
 70 |     // Number of channels
 71 |     int num_channels = THFloatTensor_size(bottom_grad, 1);
 72 | 
 73 |     // do ROIAlignBackward
 74 |     ROIAlignBackwardCpu(top_grad_flat, spatial_scale, num_rois, data_height,
 75 |             data_width, num_channels, aligned_height, aligned_width, rois_flat, bottom_grad_flat);
 76 | 
 77 |     return 1;
 78 | }
 79 | 
 80 | void ROIAlignForwardCpu(const float* bottom_data, const float spatial_scale, const int num_rois,
 81 |                      const int height, const int width, const int channels,
 82 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
 83 |                      float* top_data)
 84 | {
 85 |     const int output_size = num_rois * aligned_height * aligned_width * channels;
 86 | 
 87 |     int idx = 0;
 88 |     for (idx = 0; idx < output_size; ++idx)
 89 |     {
 90 |         // (n, c, ph, pw) is an element in the aligned output
 91 |         int pw = idx % aligned_width;
 92 |         int ph = (idx / aligned_width) % aligned_height;
 93 |         int c = (idx / aligned_width / aligned_height) % channels;
 94 |         int n = idx / aligned_width / aligned_height / channels;
 95 | 
 96 |         float roi_batch_ind = bottom_rois[n * 5 + 0];
 97 |         float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
 98 |         float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
 99 |         float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
100 |         float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
101 | 
102 |         // Force malformed ROI to be 1x1
103 |         float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
104 |         float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
105 |         float bin_size_h = roi_height / (aligned_height - 1.);
106 |         float bin_size_w = roi_width / (aligned_width - 1.);
107 | 
108 |         float h = (float)(ph) * bin_size_h + roi_start_h;
109 |         float w = (float)(pw) * bin_size_w + roi_start_w;
110 | 
111 |         int hstart = fminf(floor(h), height - 2);
112 |         int wstart = fminf(floor(w), width - 2);
113 | 
114 |         int img_start = roi_batch_ind * channels * height * width;
115 | 
116 |         // bilinear interpolation
117 |         if (h < 0 || h >= height || w < 0 || w >= width)
118 |         {
119 |             top_data[idx] = 0.;
120 |         }
121 |         else
122 |         {
123 |             float h_ratio = h - (float)(hstart);
124 |             float w_ratio = w - (float)(wstart);
125 |             int upleft = img_start + (c * height + hstart) * width + wstart;
126 |             int upright = upleft + 1;
127 |             int downleft = upleft + width;
128 |             int downright = downleft + 1;
129 | 
130 |             top_data[idx] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
131 |                 + bottom_data[upright] * (1. - h_ratio) * w_ratio
132 |                 + bottom_data[downleft] * h_ratio * (1. - w_ratio)
133 |                 + bottom_data[downright] * h_ratio * w_ratio;
134 |         }
135 |     }
136 | }
137 | 
138 | void ROIAlignBackwardCpu(const float* top_diff, const float spatial_scale, const int num_rois,
139 |                      const int height, const int width, const int channels,
140 |                      const int aligned_height, const int aligned_width, const float * bottom_rois,
141 |                      float* bottom_diff)
142 | {
143 |     const int output_size = num_rois * aligned_height * aligned_width * channels;
144 | 
145 |     int idx = 0;
146 |     for (idx = 0; idx < output_size; ++idx)
147 |     {
148 |         // (n, c, ph, pw) is an element in the aligned output
149 |         int pw = idx % aligned_width;
150 |         int ph = (idx / aligned_width) % aligned_height;
151 |         int c = (idx / aligned_width / aligned_height) % channels;
152 |         int n = idx / aligned_width / aligned_height / channels;
153 | 
154 |         float roi_batch_ind = bottom_rois[n * 5 + 0];
155 |         float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
156 |         float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
157 |         float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
158 |         float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
159 | 
160 |         // Force malformed ROI to be 1x1
161 |         float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
162 |         float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
163 |         float bin_size_h = roi_height / (aligned_height - 1.);
164 |         float bin_size_w = roi_width / (aligned_width - 1.);
165 | 
166 |         float h = (float)(ph) * bin_size_h + roi_start_h;
167 |         float w = (float)(pw) * bin_size_w + roi_start_w;
168 | 
169 |         int hstart = fminf(floor(h), height - 2);
170 |         int wstart = fminf(floor(w), width - 2);
171 | 
172 |         int img_start = roi_batch_ind * channels * height * width;
173 | 
174 |         // bilinear interpolation
175 |         if (h < 0 || h >= height || w < 0 || w >= width)
176 |         {
177 |             float h_ratio = h - (float)(hstart);
178 |             float w_ratio = w - (float)(wstart);
179 |             int upleft = img_start + (c * height + hstart) * width + wstart;
180 |             int upright = upleft + 1;
181 |             int downleft = upleft + width;
182 |             int downright = downleft + 1;
183 | 
184 |             bottom_diff[upleft] += top_diff[idx] * (1. - h_ratio) * (1. - w_ratio);
185 |             bottom_diff[upright] += top_diff[idx] * (1. - h_ratio) *  w_ratio;
186 |             bottom_diff[downleft] += top_diff[idx] * h_ratio * (1. - w_ratio);
187 |             bottom_diff[downright] += top_diff[idx] * h_ratio * w_ratio;
188 |         }
189 |     }
190 | }
191 | 


--------------------------------------------------------------------------------
/lib/model/roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
 16 |                                     const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
 17 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 18 |             // (n, c, ph, pw) is an element in the aligned output
 19 |             // int n = index;
 20 |             // int pw = n % aligned_width;
 21 |             // n /= aligned_width;
 22 |             // int ph = n % aligned_height;
 23 |             // n /= aligned_height;
 24 |             // int c = n % channels;
 25 |             // n /= channels;
 26 | 
 27 |             int pw = index % aligned_width;
 28 |             int ph = (index / aligned_width) % aligned_height;
 29 |             int c  = (index / aligned_width / aligned_height) % channels;
 30 |             int n  = index / aligned_width / aligned_height / channels;
 31 | 
 32 |             // bottom_rois += n * 5;
 33 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
 34 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
 35 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
 36 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
 37 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
 38 | 
 39 |             // Force malformed ROIs to be 1x1
 40 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
 41 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
 42 |             float bin_size_h = roi_height / (aligned_height - 1.);
 43 |             float bin_size_w = roi_width / (aligned_width - 1.);
 44 | 
 45 |             float h = (float)(ph) * bin_size_h + roi_start_h;
 46 |             float w = (float)(pw) * bin_size_w + roi_start_w;
 47 | 
 48 |             int hstart = fminf(floor(h), height - 2);
 49 |             int wstart = fminf(floor(w), width - 2);
 50 | 
 51 |             int img_start = roi_batch_ind * channels * height * width;
 52 | 
 53 |             // bilinear interpolation
 54 |             if (h < 0 || h >= height || w < 0 || w >= width) {
 55 |                 top_data[index] = 0.;
 56 |             } else {
 57 |                 float h_ratio = h - (float)(hstart);
 58 |                 float w_ratio = w - (float)(wstart);
 59 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
 60 |                 int upright = upleft + 1;
 61 |                 int downleft = upleft + width;
 62 |                 int downright = downleft + 1;
 63 | 
 64 |                 top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
 65 |                     + bottom_data[upright] * (1. - h_ratio) * w_ratio
 66 |                     + bottom_data[downleft] * h_ratio * (1. - w_ratio)
 67 |                     + bottom_data[downright] * h_ratio * w_ratio;
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 | 
 73 |     int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
 74 |                                const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
 75 |         const int kThreadsPerBlock = 1024;
 76 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
 77 |         cudaError_t err;
 78 | 
 79 | 
 80 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 81 |           output_size, bottom_data, spatial_scale, height, width, channels,
 82 |           aligned_height, aligned_width, bottom_rois, top_data);
 83 | 
 84 |         err = cudaGetLastError();
 85 |         if(cudaSuccess != err) {
 86 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 87 |             exit( -1 );
 88 |         }
 89 | 
 90 |         return 1;
 91 |     }
 92 | 
 93 | 
 94 |     __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
 95 |                                      const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
 96 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 97 | 
 98 |             // (n, c, ph, pw) is an element in the aligned output
 99 |             int pw = index % aligned_width;
100 |             int ph = (index / aligned_width) % aligned_height;
101 |             int c  = (index / aligned_width / aligned_height) % channels;
102 |             int n  = index / aligned_width / aligned_height / channels;
103 | 
104 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
105 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 |             /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 |             /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 |             /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 |             /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 | 
114 |             // Force malformed ROIs to be 1x1
115 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 |             float bin_size_h = roi_height / (aligned_height - 1.);
118 |             float bin_size_w = roi_width / (aligned_width - 1.);
119 | 
120 |             float h = (float)(ph) * bin_size_h + roi_start_h;
121 |             float w = (float)(pw) * bin_size_w + roi_start_w;
122 | 
123 |             int hstart = fminf(floor(h), height - 2);
124 |             int wstart = fminf(floor(w), width - 2);
125 | 
126 |             int img_start = roi_batch_ind * channels * height * width;
127 | 
128 |             // bilinear interpolation
129 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 |                 float h_ratio = h - (float)(hstart);
131 |                 float w_ratio = w - (float)(wstart);
132 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
133 |                 int upright = upleft + 1;
134 |                 int downleft = upleft + width;
135 |                 int downright = downleft + 1;
136 | 
137 |                 atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 |                 atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 |                 atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 |                 atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 |             }
142 |         }
143 |     }
144 | 
145 |     int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 |                                 const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 |         const int kThreadsPerBlock = 1024;
148 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
149 |         cudaError_t err;
150 | 
151 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 |           output_size, top_diff, spatial_scale, height, width, channels,
153 |           aligned_height, aligned_width, bottom_diff, bottom_rois);
154 | 
155 |         err = cudaGetLastError();
156 |         if(cudaSuccess != err) {
157 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 |             exit( -1 );
159 |         }
160 | 
161 |         return 1;
162 |     }
163 | 
164 | 
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 | 


--------------------------------------------------------------------------------