├── models ├── __init__.py ├── mask_rcnn │ └── __init__.py ├── faster_rcnn │ ├── __init__.py │ ├── init.py │ ├── vgg_adver_expansion_cluster.py │ └── test_module.py ├── head.py └── losses.py ├── utils ├── __init__.py ├── distributed_utils.py ├── log_helper.py ├── lr_helper.py ├── load_helper.py ├── coco_eval.py ├── anchor_helper.py ├── bbox_helper.py ├── cal_mAP.py └── visualize_helper.py ├── datasets ├── __init__.py ├── pycocotools │ ├── __init__.py │ ├── Makefile │ ├── setup.py │ ├── common │ │ ├── maskApi.h │ │ └── gason.h │ └── mask.py ├── target_dataset.py ├── example_loader.py ├── coco_loader.py └── example_dataset.py ├── functions ├── __init__.py ├── predict_bbox.py ├── rpn_proposal.py ├── proposal_assign.py └── anchor_target.py ├── extensions ├── _nms │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── nms │ │ │ └── __init__.py │ ├── src │ │ ├── nms_cuda.h │ │ ├── nms.h │ │ ├── cuda │ │ │ ├── nms_kernel.h │ │ │ └── nms_kernel.cu │ │ ├── nms_cuda.c │ │ └── nms.c │ ├── build.sh │ ├── build.py │ └── pth_nms.py ├── _roi_align │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── roi_align │ │ │ └── __init__.py │ ├── functions │ │ ├── __init__.py │ │ └── roi_align.py │ ├── modules │ │ ├── __init__.py │ │ └── roi_align.py │ ├── build.sh │ ├── src │ │ ├── roi_align_cuda.h │ │ ├── roi_align_kernel.h │ │ ├── roi_align_cuda.c │ │ └── roi_align_kernel.cu │ └── build.py ├── _bbox_helper │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── bbox_helper │ │ │ └── __init__.py │ ├── src │ │ ├── bbox_helper.h │ │ ├── bbox_helper_cuda.h │ │ ├── bbox_helper.c │ │ ├── cuda │ │ │ ├── iou_overlap_kernel.h │ │ │ └── iou_overlap_kernel.cu │ │ └── bbox_helper_cuda.c │ ├── build.sh │ ├── bbox_helper.py │ └── build.py ├── _roi_pooling │ ├── __init__.py │ ├── _ext │ │ ├── __init__.py │ │ └── roi_pooling │ │ │ └── __init__.py │ ├── modules │ │ ├── __init__.py │ │ ├── roi_pool.py │ │ └── roi_pool_py.py │ ├── functions │ │ ├── __init__.py │ │ └── roi_pool.py │ ├── src │ │ ├── roi_pooling.h │ │ ├── roi_pooling_cuda.h │ │ ├── roi_pooling_kernel.h │ │ ├── roi_pooling_cuda.c │ │ └── roi_pooling.c │ ├── build.sh │ └── build.py ├── _focal_loss │ ├── _ext │ │ ├── __init__.py │ │ └── focal_loss │ │ │ └── __init__.py │ ├── build.sh │ ├── src │ │ ├── cuda │ │ │ ├── focal_loss_sigmoid_kernel.h │ │ │ ├── focal_loss_softmax_kernel.h │ │ │ ├── focal_loss_sigmoid_kernel.cu │ │ │ └── focal_loss_softmax_kernel.cu │ │ ├── focal_loss_cuda.h │ │ └── focal_loss_cuda.c │ ├── build.py │ └── focal_loss.py ├── _cython_bbox │ ├── build.sh │ ├── setup.py │ ├── cython_bbox.pyx │ └── cython_nms.pyx ├── __init__.py └── build_all.sh ├── img └── pipeline4.png ├── examples └── faster-rcnn │ └── cityscapes │ └── vgg │ ├── eval_single.sh │ ├── 2cluster.sh │ ├── 4cluster.sh │ ├── 8cluster.sh │ ├── eval.sh │ └── config_512.json └── README.md /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_nms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/mask_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_nms/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_focal_loss/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_align/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_align/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/_ext/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /img/pipeline4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinge008/SCDA/HEAD/img/pipeline4.png -------------------------------------------------------------------------------- /extensions/_cython_bbox/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python setup.py build_ext --inplace 3 | -------------------------------------------------------------------------------- /extensions/_nms/src/nms_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh); -------------------------------------------------------------------------------- /extensions/_bbox_helper/src/bbox_helper.h: -------------------------------------------------------------------------------- 1 | int cpu_iou_overlaps(THFloatTensor * bboxes1, THFloatTensor * bboxes2, THFloatTensor * output); 2 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/src/bbox_helper_cuda.h: -------------------------------------------------------------------------------- 1 | int gpu_iou_overlaps(THCudaTensor * bboxes1, THCudaTensor * bboxes2, THCudaTensor * output); 2 | -------------------------------------------------------------------------------- /extensions/_nms/src/nms.h: -------------------------------------------------------------------------------- 1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh); -------------------------------------------------------------------------------- /extensions/_nms/build.sh: -------------------------------------------------------------------------------- 1 | cd src/cuda 2 | echo "Compiling nms kernels by nvcc..." 3 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50 4 | cd ../../ 5 | python build.py 6 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/build.sh: -------------------------------------------------------------------------------- 1 | cd src/cuda 2 | echo "Compiling nms kernels by nvcc..." 3 | nvcc -c -o iou_overlap_kernel.cu.o iou_overlap_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50 4 | cd ../../ 5 | python build.py 6 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/src/roi_pooling.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output); -------------------------------------------------------------------------------- /extensions/__init__.py: -------------------------------------------------------------------------------- 1 | from extensions._nms.pth_nms import pth_nms as nms 2 | # from extensions._psroi_pooling.psroi_pool import PSRoIPool 3 | from extensions._roi_pooling.modules.roi_pool import _RoIPooling as RoIPool 4 | # from extensions._deformable_convolution.deformable_conv import * -------------------------------------------------------------------------------- /extensions/_roi_align/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50 8 | 9 | cd ../ 10 | python build.py 11 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda/ 4 | 5 | cd src 6 | echo "Compiling my_lib kernels by nvcc..." 7 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50 8 | 9 | cd ../ 10 | python build.py 11 | 12 | -------------------------------------------------------------------------------- /extensions/build_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for file in ./* 3 | do 4 | if test -d $file && test -f $file/build.sh 5 | then 6 | cd $file 7 | echo building $file 8 | bash build.sh 9 | if [ $? != 0 ]; then 10 | exit 11 | fi 12 | cd .. 13 | fi 14 | done 15 | -------------------------------------------------------------------------------- /datasets/pycocotools/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build 10 | clean: 11 | rm _mask.c _mask.cpython-36m-x86_64-linux-gnu.so 12 | -------------------------------------------------------------------------------- /extensions/_focal_loss/build.sh: -------------------------------------------------------------------------------- 1 | cd src/cuda 2 | echo "Compiling focal_loss kernels by nvcc..." 3 | nvcc -c -o focal_loss_sigmoid_kernel.cu.o focal_loss_sigmoid_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50 4 | nvcc -c -o focal_loss_softmax_kernel.cu.o focal_loss_softmax_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50 5 | cd ../../ 6 | python build.py 7 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/src/bbox_helper.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_iou_overlaps(THFloatTensor * bboxes1, THFloatTensor * bboxes2, THFloatTensor * output){ 5 | 6 | float * bboxes1_flat = THFloatTensor_data(bboxes1); 7 | float * bboxes2_flat = THFloatTensor_data(bboxes2); 8 | 9 | // TO BE IMPLEMENTED 10 | } 11 | -------------------------------------------------------------------------------- /extensions/_roi_align/src/roi_align_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 3 | 4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 6 | -------------------------------------------------------------------------------- /extensions/_nms/src/cuda/nms_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _NMS_KERNEL 2 | #define _NMS_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 10 | 11 | void _nms(int boxes_num, float * boxes_dev, 12 | unsigned long long * mask_dev, float nms_overlap_thresh); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/src/roi_pooling_cuda.h: -------------------------------------------------------------------------------- 1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax); 3 | 4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax); -------------------------------------------------------------------------------- /extensions/_bbox_helper/src/cuda/iou_overlap_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _IOU_OVERLAP_KERNEL 2 | #define _IOU_OVERLAP_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int IOUOverlap( 9 | const float* bboxes1_data, const float* bboxes2_data, 10 | const int size_bbox, 11 | const int num_bbox1, 12 | const int num_bbox2, 13 | float* top_data, 14 | cudaStream_t stream); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif 21 | 22 | -------------------------------------------------------------------------------- /extensions/_nms/_ext/nms/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._nms import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /extensions/_roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/_ext/bbox_helper/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._bbox_helper import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /extensions/_focal_loss/_ext/focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._focal_loss import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/_ext/roi_pooling/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_pooling import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /models/faster_rcnn/init.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 18-4-19 3 | # @Author : Xinge 4 | import torch.nn.init as init 5 | import numpy as np 6 | 7 | 8 | def gaussian_weights_init(m): 9 | classname = m.__class__.__name__ 10 | if classname.find('Conv') != -1 and classname.find('Conv') == 0: 11 | # print m.__class__.__name__ 12 | m.weight.data.normal_(0.0, 0.02) 13 | 14 | def xavier_weights_init(m): 15 | classname = m.__class__.__name__ 16 | if classname.find('Conv') != -1: 17 | init.xavier_uniform(m.weight, gain=np.sqrt(2)) 18 | init.constant(m.bias, 0.1) 19 | 20 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/bbox_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from extensions._bbox_helper._ext import bbox_helper 3 | import numpy as np 4 | 5 | def overlap(bboxes1, bboxes2): 6 | # bboxes1, bboxes2 has to be a tensor 7 | # bboxes1 [N, 4]: x1, y1, x2, y2 8 | # bboxes2 [M, 4]: x1, y1, x2, y2 9 | bboxes1 = torch.from_numpy(bboxes1[:, :4]).float().cuda().contiguous() 10 | bboxes2 = torch.from_numpy(bboxes2[:, :4]).float().cuda().contiguous() 11 | 12 | output = torch.cuda.FloatTensor(bboxes1.shape[0], bboxes2.shape[0]) 13 | bbox_helper.gpu_iou_overlaps(bboxes1, bboxes2, output) 14 | 15 | return output.cpu().numpy() 16 | 17 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import RoIPoolFunction 3 | 4 | 5 | class _RoIPooling(Module): 6 | def __init__(self, pooled_height, pooled_width, spatial_scale): 7 | super(_RoIPooling, self).__init__() 8 | 9 | self.pooled_width = int(pooled_width) 10 | self.pooled_height = int(pooled_height) 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | assert(rois.shape[1] == 5) 15 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois) 16 | -------------------------------------------------------------------------------- /extensions/_focal_loss/src/cuda/focal_loss_sigmoid_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _FOCAL_LOSS_SIGMOID_KERNEL 2 | #define _FOCAL_LOSS_SIGMOID_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int SigmoidFocalLossForwardLaucher( 9 | const int N, const float* logits, 10 | const int* targets, const float weight_pos, 11 | const float gamma, const float alpha, 12 | const int num_classes, float* losses, cudaStream_t stream); 13 | 14 | int SigmoidFocalLossBackwardLaucher( 15 | const int N, const float* logits, 16 | const int* targets, float* dX_data, const float weight_pos, 17 | const float gamma, const float alpha, const int num_classes, 18 | cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /extensions/_focal_loss/src/cuda/focal_loss_softmax_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _FOCAL_LOSS_SOFTMAX_KERNEL 2 | #define _FOCAL_LOSS_SOFTMAX_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int SoftmaxFocalLossForwardLaucher( 9 | const int N, const float* logits, 10 | const int* targets, const float weight_pos, 11 | const float gamma, const float alpha, 12 | const int num_classes, float* losses, 13 | float* priors, cudaStream_t stream); 14 | 15 | int SoftmaxFocalLossBackwardLaucher( 16 | const int N, const float* logits, 17 | const int* targets, float* dX_data, const float weight_pos, 18 | const float gamma, const float alpha, const int num_classes, 19 | const float* priors, float* buff, cudaStream_t stream); 20 | 21 | #ifdef __cplusplus 22 | } 23 | #endif 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /datasets/pycocotools/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['common/maskApi.c', '_mask.pyx'], 13 | include_dirs = [np.get_include(), 'common'], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | packages=['pycocotools'], 20 | package_dir = {'pycocotools': '.'}, 21 | version='2.0', 22 | ext_modules= 23 | cythonize(ext_modules) 24 | ) 25 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/src/roi_pooling_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_POOLING_KERNEL 2 | #define _ROI_POOLING_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int ROIPoolForwardLaucher( 9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 10 | const int width, const int channels, const int pooled_height, 11 | const int pooled_width, const float* bottom_rois, 12 | float* top_data, int* argmax_data, cudaStream_t stream); 13 | 14 | 15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 16 | const int height, const int width, const int channels, const int pooled_height, 17 | const int pooled_width, const float* bottom_rois, 18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream); 19 | 20 | #ifdef __cplusplus 21 | } 22 | #endif 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /extensions/_nms/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/nms.c'] 7 | headers = ['src/nms.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/nms_cuda.c'] 14 | headers += ['src/nms_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/nms_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.nms', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/roi_pooling.c'] 7 | headers = ['src/roi_pooling.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/roi_pooling_cuda.c'] 14 | headers += ['src/roi_pooling_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/roi_pooling.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.roi_pooling', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['src/bbox_helper.c'] 7 | headers = ['src/bbox_helper.h'] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/bbox_helper_cuda.c'] 14 | headers += ['src/bbox_helper_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/iou_overlap_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | 23 | ffi = create_extension( 24 | '_ext.bbox_helper', 25 | headers=headers, 26 | sources=sources, 27 | define_macros=defines, 28 | relative_to=__file__, 29 | with_cuda=with_cuda, 30 | extra_objects=extra_objects 31 | ) 32 | 33 | if __name__ == '__main__': 34 | ffi.build() 35 | -------------------------------------------------------------------------------- /extensions/_roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | # sources = ['src/roi_align.c'] 6 | # headers = ['src/roi_align.h'] 7 | sources = [] 8 | headers = [] 9 | defines = [] 10 | with_cuda = False 11 | 12 | if torch.cuda.is_available(): 13 | print('Including CUDA code.') 14 | sources += ['src/roi_align_cuda.c'] 15 | headers += ['src/roi_align_cuda.h'] 16 | defines += [('WITH_CUDA', None)] 17 | with_cuda = True 18 | 19 | this_file = os.path.dirname(os.path.realpath(__file__)) 20 | print(this_file) 21 | extra_objects = ['src/roi_align_kernel.cu.o'] 22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 23 | 24 | ffi = create_extension( 25 | '_ext.roi_align', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /extensions/_focal_loss/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = [] 7 | headers = [] 8 | defines = [] 9 | with_cuda = False 10 | 11 | if torch.cuda.is_available(): 12 | print('Including CUDA code.') 13 | sources += ['src/focal_loss_cuda.c'] 14 | headers += ['src/focal_loss_cuda.h'] 15 | defines += [('WITH_CUDA', None)] 16 | with_cuda = True 17 | 18 | this_file = os.path.dirname(os.path.realpath(__file__)) 19 | print(this_file) 20 | extra_objects = ['src/cuda/focal_loss_sigmoid_kernel.cu.o', 'src/cuda/focal_loss_softmax_kernel.cu.o'] 21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 22 | print('extra_objects {0}'.format(extra_objects)) 23 | 24 | ffi = create_extension( 25 | '_ext.focal_loss', 26 | headers=headers, 27 | sources=sources, 28 | define_macros=defines, 29 | relative_to=__file__, 30 | with_cuda=with_cuda, 31 | extra_objects=extra_objects 32 | ) 33 | 34 | if __name__ == '__main__': 35 | ffi.build() 36 | -------------------------------------------------------------------------------- /examples/faster-rcnn/cityscapes/vgg/eval_single.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ROOT=../../../.. 3 | export PYTHONPATH=$ROOT:$PYTHONPATH 4 | #-------------------------- 5 | job_name=Test 6 | ckdir=4cluster 7 | mkdir -p ./${ckdir}/${job_name} 8 | #-------------------------- 9 | 10 | python -u $ROOT/tools/faster_rcnn_train_val.py \ 11 | --config=config_512.json \ 12 | --dist=0 \ 13 | --fix_num=3 \ 14 | --L1=1 \ 15 | -e \ 16 | --cluster_num=4 \ 17 | --threshold=128 \ 18 | --recon_size=256 \ 19 | --port=21603 \ 20 | --arch=vgg16_FasterRCNN \ 21 | --warmup_epochs=1 \ 22 | --lr=0.0000125 \ 23 | --step_epochs=16,22 \ 24 | --batch-size=1 \ 25 | --epochs=25 \ 26 | --dataset=cityscapes \ 27 | --resume=/path/to/checkpoint.pth \ 28 | --train_meta_file=/path/to/train.txt \ 29 | --target_meta_file=/path/to/foggy_train.txt \ 30 | --val_meta_file=/path/to/foggy_val.txt \ 31 | --datadir=/path/to/leftImg8bit/ \ 32 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \ 33 | --results_dir=${ckdir}/${job_name}/results_dir \ 34 | --save_dir=${ckdir}/${job_name} \ 35 | 2>&1 | tee ${ckdir}/${job_name}/train.log 36 | -------------------------------------------------------------------------------- /models/head.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | class NaiveRpnHead(nn.Module): 4 | def __init__(self, inplanes, num_classes, num_anchors): 5 | ''' 6 | Args: 7 | inplanes: input channel 8 | num_classes: as the name implies 9 | num_anchors: as the name implies 10 | ''' 11 | super(NaiveRpnHead, self).__init__() 12 | self.num_anchors, self.num_classes = num_anchors, num_classes 13 | self.conv3x3 = nn.Conv2d(inplanes, 512, kernel_size=3, stride=1, padding=1) 14 | self.relu3x3 = nn.ReLU(inplace=True) 15 | self.conv_cls = nn.Conv2d( 16 | 512, num_anchors * num_classes, kernel_size=1, stride=1) 17 | self.conv_loc = nn.Conv2d( 18 | 512, num_anchors * 4, kernel_size=1, stride=1) 19 | 20 | def forward(self, x): 21 | ''' 22 | Args: 23 | x: [B, inplanes, h, w], input feature 24 | Return: 25 | pred_cls: [B, num_anchors, h, w] 26 | pred_loc: [B, num_anchors*4, h, w] 27 | ''' 28 | x = self.conv3x3(x) 29 | x = self.relu3x3(x) 30 | pred_cls = self.conv_cls(x) 31 | pred_loc = self.conv_loc(x) 32 | return pred_cls, pred_loc 33 | -------------------------------------------------------------------------------- /examples/faster-rcnn/cityscapes/vgg/2cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ROOT=../../../.. 3 | export PYTHONPATH=$ROOT:$PYTHONPATH 4 | #-------------------------- 5 | job_name=training_2cluster 6 | ckdir=2cluster 7 | mkdir -p ./${ckdir}/${job_name} 8 | #-------------------------- 9 | PARTITION=$1 10 | GPUS=${5:-8} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 12 | 13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \ 14 | --ntasks-per-node=${GPUS_PER_NODE} \ 15 | --job-name=${job_name} \ 16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \ 17 | --config=config_512.json \ 18 | --dist=1 \ 19 | --fix_num=0 \ 20 | --L1=1 \ 21 | --cluster_num=2 \ 22 | --threshold=256 \ 23 | --recon_size=512 \ 24 | --port=21603 \ 25 | --arch=vgg16_FasterRCNN \ 26 | --warmup_epochs=1 \ 27 | --lr=0.0000125 \ 28 | --step_epochs=16,22 \ 29 | --batch-size=1 \ 30 | --epochs=25 \ 31 | --dataset=cityscapes \ 32 | --train_meta_file=/path/to/train.txt \ 33 | --target_meta_file=/path/to/foggy_train.txt \ 34 | --val_meta_file=/path/to/foggy_val.txt \ 35 | --datadir=/path/to/leftImg8bit/ \ 36 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \ 37 | --results_dir=${ckdir}/${job_name}/results_dir \ 38 | --save_dir=${ckdir}/${job_name} \ 39 | 2>&1 | tee ${ckdir}/${job_name}/train.log 40 | -------------------------------------------------------------------------------- /examples/faster-rcnn/cityscapes/vgg/4cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ROOT=../../../.. 3 | export PYTHONPATH=$ROOT:$PYTHONPATH 4 | #-------------------------- 5 | job_name=training_4cluster 6 | ckdir=4cluster 7 | mkdir -p ./${ckdir}/${job_name} 8 | #-------------------------- 9 | PARTITION=$1 10 | GPUS=${5:-8} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 12 | 13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \ 14 | --ntasks-per-node=${GPUS_PER_NODE} \ 15 | --job-name=${job_name} \ 16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \ 17 | --config=config_512.json \ 18 | --dist=1 \ 19 | --fix_num=0 \ 20 | --L1=1 \ 21 | --cluster_num=4 \ 22 | --threshold=128 \ 23 | --recon_size=256 \ 24 | --port=21603 \ 25 | --arch=vgg16_FasterRCNN \ 26 | --warmup_epochs=1 \ 27 | --lr=0.0000125 \ 28 | --step_epochs=16,22 \ 29 | --batch-size=1 \ 30 | --epochs=25 \ 31 | --dataset=cityscapes \ 32 | --train_meta_file=/path/to/train.txt \ 33 | --target_meta_file=/path/to/foggy_train.txt \ 34 | --val_meta_file=/path/to/foggy_val.txt \ 35 | --datadir=/path/to/leftImg8bit/ \ 36 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \ 37 | --results_dir=${ckdir}/${job_name}/results_dir \ 38 | --save_dir=${ckdir}/${job_name} \ 39 | 2>&1 | tee ${ckdir}/${job_name}/train.log 40 | 41 | -------------------------------------------------------------------------------- /examples/faster-rcnn/cityscapes/vgg/8cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ROOT=../../../.. 3 | export PYTHONPATH=$ROOT:$PYTHONPATH 4 | #-------------------------- 5 | job_name=training_8cluster 6 | ckdir=8cluster 7 | mkdir -p ./${ckdir}/${job_name} 8 | #-------------------------- 9 | PARTITION=$1 10 | GPUS=${5:-8} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 12 | 13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \ 14 | --ntasks-per-node=${GPUS_PER_NODE} \ 15 | --job-name=${job_name} \ 16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \ 17 | --config=config_512.json \ 18 | --dist=1 \ 19 | --fix_num=0 \ 20 | --L1=1 \ 21 | --cluster_num=8 \ 22 | --threshold=64 \ 23 | --recon_size=128 \ 24 | --port=21603 \ 25 | --arch=vgg16_FasterRCNN \ 26 | --warmup_epochs=1 \ 27 | --lr=0.0000125 \ 28 | --step_epochs=16,22 \ 29 | --batch-size=1 \ 30 | --epochs=25 \ 31 | --dataset=cityscapes \ 32 | --train_meta_file=/path/to/train.txt \ 33 | --target_meta_file=/path/to/foggy_train.txt \ 34 | --val_meta_file=/path/to/foggy_val.txt \ 35 | --datadir=/path/to/leftImg8bit/ \ 36 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \ 37 | --results_dir=${ckdir}/${job_name}/results_dir \ 38 | --save_dir=${ckdir}/${job_name} \ 39 | 2>&1 | tee ${ckdir}/${job_name}/train.log 40 | 41 | -------------------------------------------------------------------------------- /examples/faster-rcnn/cityscapes/vgg/eval.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ROOT=../../../.. 3 | export PYTHONPATH=$ROOT:$PYTHONPATH 4 | #-------------------------- 5 | job_name=Test 6 | ckdir=4cluster 7 | mkdir -p ./${ckdir}/${job_name} 8 | #-------------------------- 9 | PARTITION=$1 10 | GPUS=${5:-8} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 12 | 13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \ 14 | --ntasks-per-node=${GPUS_PER_NODE} \ 15 | --job-name=${job_name} \ 16 | python -u $ROOT/tools/faster_rcnn_train_val.py \ 17 | --config=config_512.json \ 18 | --dist=1 \ 19 | --fix_num=3 \ 20 | --L1=1 \ 21 | -e \ 22 | --cluster_num=4 \ 23 | --threshold=128 \ 24 | --recon_size=256 \ 25 | --port=21603 \ 26 | --arch=vgg16_FasterRCNN \ 27 | --warmup_epochs=1 \ 28 | --lr=0.0000125 \ 29 | --step_epochs=16,22 \ 30 | --batch-size=1 \ 31 | --epochs=25 \ 32 | --dataset=cityscapes \ 33 | --resume=/path/to/checkpoint.pth \ 34 | --train_meta_file=/path/to/train.txt \ 35 | --target_meta_file=/path/to/foggy_train.txt \ 36 | --val_meta_file=/path/to/foggy_val.txt \ 37 | --datadir=/path/to/leftImg8bit/ \ 38 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \ 39 | --results_dir=${ckdir}/${job_name}/results_dir \ 40 | --save_dir=${ckdir}/${job_name} \ 41 | 2>&1 | tee ${ckdir}/${job_name}/train.log 42 | -------------------------------------------------------------------------------- /extensions/_roi_align/src/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | #ifdef __cplusplus 30 | } 31 | #endif 32 | 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /extensions/_nms/pth_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from extensions._nms._ext import nms 3 | import numpy as np 4 | 5 | def pth_nms(dets, thresh): 6 | #""" 7 | #dets has to be a tensor 8 | #""" 9 | #if not dets.is_cuda: 10 | # x1 = dets[:, 0] 11 | # y1 = dets[:, 1] 12 | # x2 = dets[:, 2] 13 | # y2 = dets[:, 3] 14 | # scores = dets[:, 4] 15 | 16 | # areas = (x2 - x1 + 1) * (y2 - y1 + 1) 17 | # order = scores.sort(0, descending=True)[1] 18 | # # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long() 19 | 20 | # keep = torch.LongTensor(dets.size(0)) 21 | # num_out = torch.LongTensor(1) 22 | # nms.cpu_nms(keep, num_out, dets, order, areas, thresh) 23 | 24 | # return keep[:num_out[0]] 25 | #else: 26 | 27 | #x1 = dets[:, 0] 28 | #y1 = dets[:, 1] 29 | #x2 = dets[:, 2] 30 | #y2 = dets[:, 3] 31 | # scores = dets[:, 4].cuda().contiguous() 32 | dets = dets.cuda().contiguous() 33 | 34 | 35 | #areas = (x2 - x1 + 1) * (y2 - y1 + 1) 36 | # order = scores.sort(0, descending=True)[1][:6000] 37 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda() 38 | 39 | # dets = dets[order].contiguous() 40 | 41 | keep = torch.LongTensor(dets.size(0)) 42 | num_out = torch.LongTensor(1) 43 | # keep = torch.cuda.LongTensor(dets.size(0)) 44 | # num_out = torch.cuda.LongTensor(1) 45 | nms.gpu_nms(keep, num_out, dets.float(), thresh) 46 | 47 | return keep[:num_out[0]].cpu().contiguous() 48 | # return order[keep[:num_out[0]]].contiguous() 49 | 50 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/src/bbox_helper_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "cuda/iou_overlap_kernel.h" 14 | 15 | 16 | extern THCState *state; 17 | 18 | int gpu_iou_overlaps(THCudaTensor * bboxes1, THCudaTensor * bboxes2, THCudaTensor * output){ 19 | // Grad the input tensor 20 | float * bboxes1_data = THCudaTensor_data(state, bboxes1); 21 | float * bboxes2_data = THCudaTensor_data(state, bboxes2); 22 | float * output_data = THCudaTensor_data(state, output); 23 | 24 | // Number of boxes 25 | int num_bbox1 = THCudaTensor_size(state, bboxes1, 0); 26 | int num_bbox2 = THCudaTensor_size(state, bboxes2, 0); 27 | int size_bbox1 = THCudaTensor_size(state, bboxes1, 1); 28 | int size_bbox2 = THCudaTensor_size(state, bboxes2, 1); 29 | 30 | assert(size_bbox1 == 4); 31 | assert(size_bbox2 == 4); 32 | if(size_bbox1 != 4 || size_bbox2 != 4){ 33 | exit(1); 34 | return 0; 35 | } 36 | 37 | cudaStream_t stream = THCState_getCurrentStream(state); 38 | IOUOverlap( 39 | bboxes1_data, 40 | bboxes2_data, 41 | size_bbox1, 42 | num_bbox1, 43 | num_bbox2, 44 | output_data, 45 | stream); 46 | return 1; 47 | } 48 | -------------------------------------------------------------------------------- /examples/faster-rcnn/cityscapes/vgg/config_512.json: -------------------------------------------------------------------------------- 1 | { 2 | "shared": { 3 | "gan_model_flag": 2, 4 | "scales": [512], 5 | "max_size": 1024, 6 | "anchor_scales": [2, 4, 8, 16, 32], 7 | "anchor_ratios": [0.5, 1, 2], 8 | "anchor_stride": 16, 9 | "bbox_normalize_stats_precomputed": true, 10 | "bbox_normalize_stds": [0.1, 0.1, 0.2, 0.2], 11 | "bbox_normalize_means": [0, 0, 0, 0], 12 | "num_classes": 9, 13 | "class_names":[ 14 | "__background__", 15 | "person", "rider", "car", "truck", 16 | "bus", "train", "motorcycle", "bicycle"], 17 | "roi_align": false 18 | }, 19 | "train_anchor_target_cfg": { 20 | "rpn_batch_size": 256, 21 | "nms_iou_thresh": 0.7, 22 | "positive_iou_thresh": 0.7, 23 | "negative_iou_thresh": 0.3, 24 | "positive_percent": 0.5, 25 | "ignore_iou_thresh": 0.5 26 | }, 27 | "train_rpn_proposal_cfg": { 28 | "nms_iou_thresh": 0.7, 29 | "pre_nms_top_n": 12000, 30 | "post_nms_top_n": 2000, 31 | "roi_min_size": 2 32 | }, 33 | "train_proposal_target_cfg": { 34 | "batch_size": 512, 35 | "positive_iou_thresh": 0.5, 36 | "negative_iou_thresh_hi": 0.5, 37 | "negative_iou_thresh_lo": 0.0, 38 | "ignore_iou_thresh": 0.5, 39 | "positive_percent": 0.25, 40 | "append_gts": true 41 | }, 42 | "test_rpn_proposal_cfg": { 43 | "nms_iou_thresh": 0.7, 44 | "pre_nms_top_n": 6000, 45 | "post_nms_top_n": 300, 46 | "roi_min_size": 2 47 | }, 48 | "test_predict_bbox_cfg": { 49 | "nms_iou_thresh": 0.5, 50 | "score_thresh": 0.00, 51 | "top_n": 100 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /extensions/_cython_bbox/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | 20 | from Cython.Build import cythonize 21 | from setuptools import Extension 22 | from setuptools import setup 23 | 24 | import numpy as np 25 | 26 | _NP_INCLUDE_DIRS = np.get_include() 27 | 28 | 29 | # Extension modules 30 | ext_modules = [ 31 | Extension( 32 | name='cython_bbox', 33 | sources=[ 34 | 'cython_bbox.pyx' 35 | ], 36 | extra_compile_args=[ 37 | '-Wno-cpp' 38 | ], 39 | include_dirs=[ 40 | _NP_INCLUDE_DIRS 41 | ] 42 | ), 43 | Extension( 44 | name='cython_nms', 45 | sources=[ 46 | 'cython_nms.pyx' 47 | ], 48 | extra_compile_args=[ 49 | '-Wno-cpp' 50 | ], 51 | include_dirs=[ 52 | _NP_INCLUDE_DIRS 53 | ] 54 | ) 55 | ] 56 | 57 | setup( 58 | name='Detectron', 59 | ext_modules=cythonize(ext_modules) 60 | ) 61 | -------------------------------------------------------------------------------- /extensions/_focal_loss/src/focal_loss_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | int focal_loss_sigmoid_forward_cuda( 3 | int N, 4 | THCudaTensor * logits, 5 | THCudaIntTensor * targets, 6 | float weight_pos, 7 | float gamma, 8 | float alpha, 9 | int num_classes, 10 | THCudaTensor * losses); 11 | 12 | int focal_loss_sigmoid_backward_cuda( 13 | int N, 14 | THCudaTensor * logits, 15 | THCudaIntTensor * targets, 16 | THCudaTensor * dX_data, 17 | float weight_pos, 18 | float gamma, 19 | float alpha, 20 | int num_classes); 21 | 22 | int focal_loss_softmax_forward_cuda( 23 | int N, 24 | THCudaTensor * logits, 25 | THCudaIntTensor * targets, 26 | float weight_pos, 27 | float gamma, 28 | float alpha, 29 | int num_classes, 30 | THCudaTensor * losses, 31 | THCudaTensor * priors); 32 | 33 | int focal_loss_softmax_backward_cuda( 34 | int N, 35 | THCudaTensor * logits, 36 | THCudaIntTensor * targets, 37 | THCudaTensor * dX_data, 38 | float weight_pos, 39 | float gamma, 40 | float alpha, 41 | int num_classes, 42 | THCudaTensor * priors, 43 | THCudaTensor * buff); 44 | -------------------------------------------------------------------------------- /extensions/_roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction 4 | 5 | 6 | class RoIAlign(Module): 7 | def __init__(self, aligned_height, aligned_width, spatial_scale): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.aligned_width = int(aligned_width) 11 | self.aligned_height = int(aligned_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 16 | self.spatial_scale)(features, rois) 17 | 18 | class RoIAlignAvg(Module): 19 | def __init__(self, aligned_height, aligned_width, spatial_scale): 20 | super(RoIAlignAvg, self).__init__() 21 | 22 | self.aligned_width = int(aligned_width) 23 | self.aligned_height = int(aligned_height) 24 | self.spatial_scale = float(spatial_scale) 25 | 26 | def forward(self, features, rois): 27 | assert(rois.shape[1] == 5) 28 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 29 | self.spatial_scale)(features, rois) 30 | return avg_pool2d(x, kernel_size=2, stride=1) 31 | 32 | class RoIAlignMax(Module): 33 | def __init__(self, aligned_height, aligned_width, spatial_scale): 34 | super(RoIAlignMax, self).__init__() 35 | 36 | self.aligned_width = int(aligned_width) 37 | self.aligned_height = int(aligned_height) 38 | self.spatial_scale = float(spatial_scale) 39 | 40 | def forward(self, features, rois): 41 | assert(rois.shape[1] == 5) 42 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 43 | self.spatial_scale)(features, rois) 44 | return max_pool2d(x, kernel_size=2, stride=1) 45 | -------------------------------------------------------------------------------- /utils/distributed_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.distributed as dist 4 | from torch.nn import Module 5 | import torch.multiprocessing as mp 6 | import logging 7 | logger = logging.getLogger('global') 8 | 9 | def average_gradients(model): 10 | """ average gradients """ 11 | for param in model.parameters(): 12 | if param.requires_grad and not (param.grad is None): 13 | dist.all_reduce(param.grad.data) 14 | 15 | def broadcast_params(model): 16 | """ broadcast model parameters """ 17 | # for models in model: 18 | for p in model.state_dict().values(): 19 | dist.broadcast(p, 0) 20 | 21 | def dist_init(port, backend = 'nccl'): 22 | method = mp.get_start_method(allow_none=True) 23 | if method is None: 24 | mp.set_start_method('spawn') 25 | logger.info('multiprocessing start method:{}'.format(method)) 26 | proc_id = int(os.environ['SLURM_PROCID']) 27 | ntasks = int(os.environ['SLURM_NTASKS']) 28 | node_list = os.environ['SLURM_NODELIST'] 29 | num_gpus = torch.cuda.device_count() 30 | torch.cuda.set_device(proc_id%num_gpus) 31 | 32 | if '[' in node_list: 33 | beg = node_list.find('[') 34 | pos1 = node_list.find('-', beg) 35 | if pos1 < 0: 36 | pos1 = 1000 37 | pos2 = node_list.find(',', beg) 38 | if pos2 < 0: 39 | pos2 = 1000 40 | node_list = node_list[:min(pos1,pos2)].replace('[', '') 41 | addr = node_list[8:].replace('-', '.') 42 | os.environ['MASTER_PORT'] = port 43 | os.environ['MASTER_ADDR'] = addr 44 | os.environ['WORLD_SIZE'] = str(ntasks) 45 | os.environ['RANK'] = str(proc_id) 46 | if backend == 'nccl': 47 | dist.init_process_group(backend='nccl') 48 | else: 49 | dist.init_process_group(backend='gloo', rank=proc_id, world_size=ntasks) 50 | 51 | rank = dist.get_rank() 52 | world_size = dist.get_world_size() 53 | return rank, world_size 54 | 55 | -------------------------------------------------------------------------------- /utils/log_helper.py: -------------------------------------------------------------------------------- 1 | #encoding: utf8 2 | from __future__ import division 3 | 4 | import os 5 | import logging 6 | import math 7 | 8 | logs = set() 9 | 10 | def init_log(name, level = logging.INFO): 11 | if (name, level) in logs: return 12 | logs.add((name, level)) 13 | logger = logging.getLogger(name) 14 | logger.setLevel(level) 15 | ch = logging.StreamHandler() 16 | ch.setLevel(level) 17 | if 'SLURM_PROCID' in os.environ: 18 | rank = int(os.environ['SLURM_PROCID']) 19 | logger.addFilter(lambda record: rank == 0) 20 | else: 21 | rank = 0 22 | format_str = '%(asctime)s-rk{}-%(filename)s#%(lineno)d:%(message)s'.format(rank) 23 | formatter = logging.Formatter(format_str) 24 | ch.setFormatter(formatter) 25 | logger.addHandler(ch) 26 | 27 | # init_log('global') 28 | 29 | def print_speed(i, i_time, n): 30 | """print_speed(index, index_time, total_iteration)""" 31 | logger = logging.getLogger('global') 32 | average_time = i_time 33 | remaining_time = (n - i) * average_time 34 | remaining_day = math.floor(remaining_time / 86400) 35 | remaining_hour = math.floor(remaining_time / 3600 - remaining_day * 24) 36 | remaining_min = math.floor(remaining_time / 60 - remaining_day * 1440 - remaining_hour * 60) 37 | logger.info('Progress: %d / %d [%d%%], Speed: %.3f s/iter, ETA %d:%02d:%02d (D:H:M)\n' % (i, n, i/n*100, average_time, remaining_day, remaining_hour, remaining_min)) 38 | 39 | 40 | def main(): 41 | for i, lvl in enumerate([logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL]): 42 | log_name = str(lvl) 43 | init_log(log_name, lvl) 44 | logger = logging.getLogger(log_name) 45 | print('****cur lvl:{}'.format(lvl)) 46 | logger.debug('debug') 47 | logger.info('info') 48 | logger.warning('warning') 49 | logger.error('error') 50 | logger.critical('critiacal') 51 | if __name__ == '__main__': 52 | main() 53 | -------------------------------------------------------------------------------- /utils/lr_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim import Optimizer 3 | 4 | class _IterLRScheduler(object): 5 | def __init__(self, optimizer, last_iter=-1): 6 | if not isinstance(optimizer, Optimizer): 7 | raise TypeError('{} is not an Optimizer'.format( 8 | type(optimizer).__name__)) 9 | self.optimizer = optimizer 10 | if last_iter == -1: 11 | for group in optimizer.param_groups: 12 | group.setdefault('initial_lr', group['lr']) 13 | else: 14 | for i, group in enumerate(optimizer.param_groups): 15 | if 'initial_lr' not in group: 16 | raise KeyError("param 'initial_lr' is not specified " 17 | "in param_groups[{}] when resuming an optimizer".format(i)) 18 | self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups)) 19 | self.step(last_iter + 1) 20 | self.last_iter = last_iter 21 | 22 | def get_lr(self): 23 | raise NotImplementedError 24 | 25 | def step(self, iter=None): 26 | if iter is None: 27 | iter = self.last_iter + 1 28 | self.last_iter = iter 29 | for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()): 30 | param_group['lr'] = lr 31 | 32 | 33 | class IterExponentialLR(_IterLRScheduler): 34 | """Set the learning rate of each parameter group to the initial lr decayed 35 | by gamma every iteration. When last_iter=-1, sets initial lr as lr. 36 | 37 | Args: 38 | optimizer (Optimizer): Wrapped optimizer. 39 | gamma (float): Multiplicative factor of learning rate decay. 40 | last_iter (int): The index of last iter. Default: -1. 41 | """ 42 | 43 | def __init__(self, optimizer, gamma, last_iter=-1): 44 | self.gamma = gamma 45 | super(IterExponentialLR, self).__init__(optimizer, last_iter) 46 | 47 | def get_lr(self): 48 | return [base_lr * self.gamma ** self.last_iter 49 | for base_lr in self.base_lrs] 50 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_pooling 4 | import pdb 5 | 6 | class RoIPoolFunction(Function): 7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale): 8 | ctx.pooled_width = pooled_width 9 | ctx.pooled_height = pooled_height 10 | ctx.spatial_scale = spatial_scale 11 | ctx.feature_size = None 12 | 13 | def forward(ctx, features, rois): 14 | ctx.feature_size = features.size() 15 | batch_size, num_channels, data_height, data_width = ctx.feature_size 16 | num_rois = rois.size(0) 17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_() 18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int() 19 | ctx.rois = rois 20 | if not features.is_cuda: 21 | _features = features.permute(0, 2, 3, 1) 22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 23 | _features, rois, output) 24 | else: 25 | assert(features.is_contiguous()) 26 | assert(rois.is_contiguous()) 27 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 28 | features, rois, output, ctx.argmax) 29 | 30 | return output 31 | 32 | def backward(ctx, grad_output): 33 | assert(ctx.feature_size is not None and grad_output.is_cuda) 34 | batch_size, num_channels, data_height, data_width = ctx.feature_size 35 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_() 36 | 37 | assert(grad_output.is_contiguous()) 38 | assert(ctx.rois.is_contiguous()) 39 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, 40 | grad_output, ctx.rois, grad_input, ctx.argmax) 41 | 42 | return grad_input, None 43 | -------------------------------------------------------------------------------- /extensions/_roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | assert(features.is_contiguous()) 24 | assert(rois.is_contiguous()) 25 | if features.is_cuda: 26 | roi_align.roi_align_forward_cuda(self.aligned_height, 27 | self.aligned_width, 28 | self.spatial_scale, features, 29 | rois, output) 30 | else: 31 | raise NotImplementedError 32 | 33 | return output 34 | 35 | def backward(self, grad_output): 36 | assert(self.feature_size is not None and grad_output.is_cuda) 37 | 38 | batch_size, num_channels, data_height, data_width = self.feature_size 39 | 40 | grad_input = self.rois.new(batch_size, num_channels, data_height, 41 | data_width).zero_() 42 | assert(grad_output.is_contiguous()) 43 | assert(self.rois.is_contiguous()) 44 | roi_align.roi_align_backward_cuda(self.aligned_height, 45 | self.aligned_width, 46 | self.spatial_scale, grad_output, 47 | self.rois, grad_input) 48 | 49 | # print grad_input 50 | 51 | return grad_input, None 52 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/modules/roi_pool_py.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | 7 | class RoIPool(nn.Module): 8 | def __init__(self, pooled_height, pooled_width, spatial_scale): 9 | super(RoIPool, self).__init__() 10 | self.pooled_width = int(pooled_width) 11 | self.pooled_height = int(pooled_height) 12 | self.spatial_scale = float(spatial_scale) 13 | 14 | def forward(self, features, rois): 15 | batch_size, num_channels, data_height, data_width = features.size() 16 | num_rois = rois.size()[0] 17 | outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda() 18 | 19 | for roi_ind, roi in enumerate(rois): 20 | batch_ind = int(roi[0].data[0]) 21 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round( 22 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int) 23 | roi_width = max(roi_end_w - roi_start_w + 1, 1) 24 | roi_height = max(roi_end_h - roi_start_h + 1, 1) 25 | bin_size_w = float(roi_width) / float(self.pooled_width) 26 | bin_size_h = float(roi_height) / float(self.pooled_height) 27 | 28 | for ph in range(self.pooled_height): 29 | hstart = int(np.floor(ph * bin_size_h)) 30 | hend = int(np.ceil((ph + 1) * bin_size_h)) 31 | hstart = min(data_height, max(0, hstart + roi_start_h)) 32 | hend = min(data_height, max(0, hend + roi_start_h)) 33 | for pw in range(self.pooled_width): 34 | wstart = int(np.floor(pw * bin_size_w)) 35 | wend = int(np.ceil((pw + 1) * bin_size_w)) 36 | wstart = min(data_width, max(0, wstart + roi_start_w)) 37 | wend = min(data_width, max(0, wend + roi_start_w)) 38 | 39 | is_empty = (hend <= hstart) or(wend <= wstart) 40 | if is_empty: 41 | outputs[roi_ind, :, ph, pw] = 0 42 | else: 43 | data = features[batch_ind] 44 | outputs[roi_ind, :, ph, pw] = torch.max( 45 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1) 46 | 47 | return outputs 48 | 49 | -------------------------------------------------------------------------------- /datasets/pycocotools/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /utils/load_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import logging 3 | import pprint 4 | logger = logging.getLogger('global') 5 | 6 | def check_keys(model, pretrained_state_dict): 7 | ckpt_keys = set(pretrained_state_dict.keys()) 8 | model_keys = set(model.state_dict().keys()) 9 | used_pretrained_keys = model_keys & ckpt_keys 10 | unused_pretrained_keys = ckpt_keys - model_keys 11 | missing_keys = model_keys - ckpt_keys 12 | pprint.pprint(model_keys) 13 | pprint.pprint(ckpt_keys) 14 | logger.info('missing keys:{}'.format(len(missing_keys))) 15 | logger.info('unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) 16 | logger.info('used keys:{}'.format(len(used_pretrained_keys))) 17 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 18 | return True 19 | 20 | 21 | def remove_prefix(state_dict, prefix): 22 | ''' Old style model is stored with all names of parameters share common prefix 'module.' ''' 23 | logger.info('remove prefix \'{}\''.format(prefix)) 24 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 25 | return {f(key): value for key, value in state_dict.items()} 26 | 27 | 28 | def load_pretrain(model, pretrained_path): 29 | logger.info('load pretrained model from {}'.format(pretrained_path)) 30 | device = torch.cuda.current_device() 31 | pretrained_dict = torch.load(pretrained_path, map_location = lambda storage, loc: storage.cuda(device)) 32 | if pretrained_path.endswith('tar'): 33 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') 34 | else: 35 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 36 | check_keys(model, pretrained_dict) 37 | model.load_state_dict(pretrained_dict, strict=False) 38 | return model 39 | 40 | 41 | def restore_from(model, optimizer, ckpt_path): 42 | logger.info('restore from {}'.format(ckpt_path)) 43 | device = torch.cuda.current_device() 44 | ckpt = torch.load(ckpt_path, map_location = lambda storage, loc: storage.cuda(device)) 45 | epoch = ckpt['epoch'] 46 | best_recall = ckpt['best_recall'] 47 | arch = ckpt['arch'] 48 | ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.') 49 | check_keys(model, ckpt_model_dict) 50 | model.load_state_dict(ckpt_model_dict, strict=False) 51 | 52 | # optimizer.load_state_dict(ckpt['optimizer']) 53 | optimizer = None 54 | return model, optimizer, epoch, best_recall, arch 55 | -------------------------------------------------------------------------------- /extensions/_nms/src/nms_cuda.c: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "cuda/nms_kernel.h" 13 | 14 | 15 | extern THCState *state; 16 | 17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) { 18 | // boxes has to be sorted 19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous"); 20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous"); 21 | // Number of ROIs 22 | int boxes_num = THCudaTensor_size(state, boxes, 0); 23 | int boxes_dim = THCudaTensor_size(state, boxes, 1); 24 | 25 | float* boxes_flat = THCudaTensor_data(state, boxes); 26 | 27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks); 29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask); 30 | 31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh); 32 | 33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks); 34 | THLongTensor_copyCuda(state, mask_cpu, mask); 35 | THCudaLongTensor_free(state, mask); 36 | 37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu); 38 | 39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks); 40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu); 41 | THLongTensor_fill(remv_cpu, 0); 42 | 43 | long * keep_flat = THLongTensor_data(keep); 44 | long num_to_keep = 0; 45 | 46 | int i, j; 47 | for (i = 0; i < boxes_num; i++) { 48 | int nblock = i / threadsPerBlock; 49 | int inblock = i % threadsPerBlock; 50 | 51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) { 52 | keep_flat[num_to_keep++] = i; 53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks; 54 | for (j = nblock; j < col_blocks; j++) { 55 | remv_cpu_flat[j] |= p[j]; 56 | } 57 | } 58 | } 59 | 60 | long * num_out_flat = THLongTensor_data(num_out); 61 | * num_out_flat = num_to_keep; 62 | 63 | THLongTensor_free(mask_cpu); 64 | THLongTensor_free(remv_cpu); 65 | 66 | return 1; 67 | } 68 | -------------------------------------------------------------------------------- /datasets/target_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 18-5-3 4:40 3 | # @Author : Xinge 4 | 5 | from __future__ import division 6 | import torch 7 | from torch.utils.data import DataLoader, Dataset 8 | import torchvision.transforms as transforms 9 | import numpy as np 10 | from io import StringIO 11 | from PIL import Image 12 | import pickle as pk 13 | import os 14 | 15 | def pil_loader(img_str): 16 | #buff = StringIO.StringIO() 17 | buff = StringIO() 18 | buff.write(img_str) 19 | buff.seek(0) 20 | with Image.open(buff) as img: 21 | return img.convert('RGB') 22 | 23 | class TargetDataset(Dataset): 24 | def __init__(self, root_dir, list_file, normalize_fn=None, memcached=False, new_w=1024, new_h=512): 25 | # self.logger = logging.getLogger('global') 26 | self.root_dir = root_dir 27 | # self.transform_fn = transform_fn 28 | self.normalize_fn = normalize_fn 29 | self.new_w = new_w 30 | self.new_h = new_h 31 | # self.memcached = memcached 32 | with open(list_file) as f: 33 | lines = f.readlines() 34 | self.metas = [x.strip() for x in lines] 35 | 36 | self.num = len(self.metas) 37 | # # aspect ratio of images for sampler sort 38 | # self.aspect_ratios = [float(m[1]) / m[2] for m in self.metas] 39 | 40 | def __len__(self): 41 | return self.num 42 | 43 | def __getitem__(self, idx): 44 | filename = os.path.join(self.root_dir, self.metas[idx]) 45 | # h, w, bbox, labels, ignores = self.metas[idx][1:] 46 | # bbox = bbox.astype(np.float32) 47 | # ignores = ignores.astype(np.float32) 48 | # labels = labels.astype(np.float32) 49 | img = Image.open(filename) 50 | if img.mode == 'L': 51 | img = img.convert('RGB') 52 | # assert (img.size[0] == w and img.size[1] == h) 53 | ## det transform 54 | img = self.transform(img, self.new_w, self.new_h) 55 | # new_w, new_h = img.size 56 | ## to tensor 57 | to_tensor = transforms.ToTensor() 58 | img = to_tensor(img) 59 | if self.normalize_fn != None: 60 | img = self.normalize_fn(img) 61 | # bbox = np.hstack([bbox, labels[:, np.newaxis]]) 62 | return img 63 | 64 | 65 | def transform(self, img, new_w, new_h): 66 | """transform 67 | 68 | :param img: 69 | :param lbl: 70 | """ 71 | new_img = img.resize((new_w, new_h)) 72 | return new_img -------------------------------------------------------------------------------- /utils/coco_eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from datasets.pycocotools.coco import COCO 4 | from datasets.pycocotools.cocoeval import COCOeval 5 | from datasets.coco_dataset import COCODataset 6 | import sys,os 7 | import re 8 | import logging 9 | 10 | logger = logging.getLogger('global') 11 | def eval_coco_ap_from_results_txt(result_dir, test_type, anno_file): 12 | logger.info("start eval coco ...") 13 | 14 | assert(test_type in ['segm', 'bbox', 'keypoints', 'person_bbox', 'person_proposal', 'proposal']) 15 | 16 | category_ids = set() 17 | coco_gt = COCO(anno_file) 18 | for anno in coco_gt.anns.values(): 19 | category_ids.add(anno['category_id']) 20 | class_to_category = {i+1:c for i, c in enumerate(sorted(category_ids))} 21 | 22 | all_res = [] 23 | for f in os.listdir(result_dir): 24 | if 'results.txt.rank' in f: 25 | for aline in open(os.path.join(result_dir, f),'r'): 26 | aline = aline.rstrip().split() 27 | res = {} 28 | res["image_id"] = int(re.split('[/.]', aline[0])[-2]) 29 | x1 = float(aline[1]) 30 | y1 = float(aline[2]) 31 | x2 = float(aline[3]) 32 | y2 = float(aline[4]) 33 | if test_type == 'proposal': 34 | res["bbox"] = [x1, y1, x2, y2] 35 | res["score"]= float(aline[-1]) 36 | res["category_id"] = 1 37 | else: 38 | res["bbox"] = [x1, y1, x2 - x1, y2 - y1] 39 | res["score"]= float(aline[-2]) 40 | res["category_id"] = class_to_category[int(aline[-1])] 41 | all_res.append(res) 42 | 43 | logger.info("all res line: {}".format(len(all_res))) 44 | 45 | #prefix = {'keypoints':'person_keypoints', 'person_bbox':'person_keypoints', 46 | # 'bbox':'instances', 'segm':'instances', 47 | # 'proposal': 'instances', 'person_proposal':'person_keypoints'}[test_type] 48 | iou_type = {'keypoints':'keypoints', 'person_bbox':'bbox', 49 | 'bbox':'bbox', 'segm':'segm', 50 | 'proposal': 'bbox', 'person_proposal':'bbox'}[test_type] 51 | 52 | logger.info('loading annotations from %s\n' % anno_file) 53 | coco_dt = coco_gt.loadRes(all_res) 54 | coco_eval = COCOeval(coco_gt, coco_dt, iou_type) 55 | 56 | if test_type.find('proposal') >= 0: 57 | coco_eval.params.useCats = 0 58 | coco_eval.params.maxDets = [1,100,1000] 59 | coco_eval.evaluate() 60 | coco_eval.accumulate() 61 | coco_eval.summarize() 62 | 63 | -------------------------------------------------------------------------------- /extensions/_roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // data height 25 | int data_height = THCudaTensor_size(state, features, 2); 26 | // data width 27 | int data_width = THCudaTensor_size(state, features, 3); 28 | // Number of channels 29 | int num_channels = THCudaTensor_size(state, features, 1); 30 | 31 | cudaStream_t stream = THCState_getCurrentStream(state); 32 | 33 | ROIAlignForwardLaucher( 34 | data_flat, spatial_scale, num_rois, data_height, 35 | data_width, num_channels, aligned_height, 36 | aligned_width, rois_flat, 37 | output_flat, stream); 38 | 39 | return 1; 40 | } 41 | 42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 44 | { 45 | // Grab the input tensor 46 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 47 | float * rois_flat = THCudaTensor_data(state, rois); 48 | 49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 50 | 51 | // Number of ROIs 52 | int num_rois = THCudaTensor_size(state, rois, 0); 53 | int size_rois = THCudaTensor_size(state, rois, 1); 54 | if (size_rois != 5) 55 | { 56 | return 0; 57 | } 58 | 59 | // batch size 60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 61 | // data height 62 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 63 | // data width 64 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 65 | // Number of channels 66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 67 | 68 | cudaStream_t stream = THCState_getCurrentStream(state); 69 | ROIAlignBackwardLaucher( 70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 71 | data_width, num_channels, aligned_height, 72 | aligned_width, rois_flat, 73 | bottom_grad_flat, stream); 74 | 75 | return 1; 76 | } 77 | -------------------------------------------------------------------------------- /datasets/example_loader.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | 3 | import torch 4 | import torch.nn.functional as F 5 | import numpy as np 6 | import logging 7 | 8 | class ExampleDataLoader(torch.utils.data.DataLoader): 9 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, 10 | num_workers=0, pin_memory=False, drop_last=False): 11 | super(ExampleDataLoader, self).__init__(dataset, batch_size, shuffle, sampler, batch_sampler, 12 | num_workers, self._collate_fn, pin_memory, drop_last) 13 | def _collate_fn(self, batch): 14 | batch_size = len(batch) 15 | 16 | zip_batch = list(zip(*batch)) 17 | images = zip_batch[0] 18 | unpad_image_sizes = zip_batch[1] 19 | ground_truth_bboxes = zip_batch[2] 20 | ignores = zip_batch[3] 21 | filenames = zip_batch[4] 22 | 23 | max_img_h = max([_.shape[-2] for _ in images]) 24 | max_img_w = max([_.shape[-1] for _ in images]) 25 | max_num_gt_bboxes = max([_.shape[0] for _ in ground_truth_bboxes]) 26 | max_num_ig_bboxes = max([_.shape[0] for _ in ignores]) 27 | 28 | 29 | padded_images = [] 30 | padded_gt_bboxes = [] 31 | padded_ig_bboxes = [] 32 | for b_ix in range(batch_size): 33 | img = images[b_ix] 34 | # pad zeros to right bottom of each image 35 | pad_size = (0, max_img_w - img.shape[-1], 0, max_img_h - img.shape[-2]) 36 | padded_images.append(F.pad(img, pad_size, 'constant', 0).data.cpu()) 37 | 38 | # pad zeros to gt_bboxes 39 | gt_bboxes = ground_truth_bboxes[b_ix].numpy() 40 | new_gt_bboxes = np.zeros([max_num_gt_bboxes, gt_bboxes.shape[-1]]) 41 | new_gt_bboxes[range(gt_bboxes.shape[0]), :] = gt_bboxes 42 | padded_gt_bboxes.append(new_gt_bboxes) 43 | 44 | # pad zeros to ig_bboxes 45 | ig_bboxes = ignores[b_ix].numpy() 46 | new_ig_bboxes = np.zeros([max_num_ig_bboxes, ig_bboxes.shape[-1]]) 47 | new_ig_bboxes[range(ig_bboxes.shape[0]), :] = ig_bboxes 48 | padded_ig_bboxes.append(new_ig_bboxes) 49 | 50 | padded_images = images = torch.cat(padded_images, dim = 0) 51 | padded_gt_bboxes = torch.from_numpy(np.stack(padded_gt_bboxes, axis = 0)) 52 | padded_ig_bboxes = torch.from_numpy(np.stack(padded_ig_bboxes, axis = 0)) 53 | unpad_image_sizes = torch.stack(unpad_image_sizes, dim = 0) 54 | #logger = logging.getLogger('global') 55 | #logger.debug('{0},{1},{2}'.format(padded_images.shape, padded_gt_bboxes.shape, unpad_image_sizes.shape)) 56 | return padded_images, unpad_image_sizes, padded_gt_bboxes, padded_ig_bboxes, filenames 57 | -------------------------------------------------------------------------------- /extensions/_cython_bbox/cython_bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Sergey Karayev 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | DTYPE = np.float32 29 | ctypedef np.float32_t DTYPE_t 30 | 31 | @cython.boundscheck(False) 32 | def bbox_overlaps( 33 | np.ndarray[DTYPE_t, ndim=2] boxes, 34 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 35 | """ 36 | Parameters 37 | ---------- 38 | boxes: (N, 4) ndarray of float 39 | query_boxes: (K, 4) ndarray of float 40 | Returns 41 | ------- 42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 43 | """ 44 | cdef unsigned int N = boxes.shape[0] 45 | cdef unsigned int K = query_boxes.shape[0] 46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 47 | cdef DTYPE_t iw, ih, box_area 48 | cdef DTYPE_t ua 49 | cdef unsigned int k, n 50 | with nogil: 51 | for k in range(K): 52 | box_area = ( 53 | (query_boxes[k, 2] - query_boxes[k, 0]) * 54 | (query_boxes[k, 3] - query_boxes[k, 1]) 55 | ) 56 | for n in range(N): 57 | iw = ( 58 | min(boxes[n, 2], query_boxes[k, 2]) - 59 | max(boxes[n, 0], query_boxes[k, 0]) 60 | ) 61 | if iw > 0: 62 | ih = ( 63 | min(boxes[n, 3], query_boxes[k, 3]) - 64 | max(boxes[n, 1], query_boxes[k, 1]) 65 | ) 66 | if ih > 0: 67 | ua = float( 68 | (boxes[n, 2] - boxes[n, 0]) * 69 | (boxes[n, 3] - boxes[n, 1]) + 70 | box_area - iw * ih 71 | ) 72 | overlaps[n, k] = iw * ih / ua 73 | return overlaps 74 | -------------------------------------------------------------------------------- /extensions/_nms/src/nms.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) { 5 | // boxes has to be sorted 6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous"); 7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous"); 8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous"); 9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous"); 10 | // Number of ROIs 11 | long boxes_num = THFloatTensor_size(boxes, 0); 12 | long boxes_dim = THFloatTensor_size(boxes, 1); 13 | 14 | long * keep_out_flat = THLongTensor_data(keep_out); 15 | float * boxes_flat = THFloatTensor_data(boxes); 16 | long * order_flat = THLongTensor_data(order); 17 | float * areas_flat = THFloatTensor_data(areas); 18 | 19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num); 20 | THByteTensor_fill(suppressed, 0); 21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed); 22 | 23 | // nominal indices 24 | int i, j; 25 | // sorted indices 26 | int _i, _j; 27 | // temp variables for box i's (the box currently under consideration) 28 | float ix1, iy1, ix2, iy2, iarea; 29 | // variables for computing overlap with box j (lower scoring box) 30 | float xx1, yy1, xx2, yy2; 31 | float w, h; 32 | float inter, ovr; 33 | 34 | long num_to_keep = 0; 35 | for (_i=0; _i < boxes_num; ++_i) { 36 | i = order_flat[_i]; 37 | if (suppressed_flat[i] == 1) { 38 | continue; 39 | } 40 | keep_out_flat[num_to_keep++] = i; 41 | ix1 = boxes_flat[i * boxes_dim]; 42 | iy1 = boxes_flat[i * boxes_dim + 1]; 43 | ix2 = boxes_flat[i * boxes_dim + 2]; 44 | iy2 = boxes_flat[i * boxes_dim + 3]; 45 | iarea = areas_flat[i]; 46 | for (_j = _i + 1; _j < boxes_num; ++_j) { 47 | j = order_flat[_j]; 48 | if (suppressed_flat[j] == 1) { 49 | continue; 50 | } 51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]); 52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]); 53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]); 54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]); 55 | w = fmaxf(0.0, xx2 - xx1 + 1); 56 | h = fmaxf(0.0, yy2 - yy1 + 1); 57 | inter = w * h; 58 | ovr = inter / (iarea + areas_flat[j] - inter); 59 | if (ovr >= nms_overlap_thresh) { 60 | suppressed_flat[j] = 1; 61 | } 62 | } 63 | } 64 | 65 | long *num_out_flat = THLongTensor_data(num_out); 66 | *num_out_flat = num_to_keep; 67 | THByteTensor_free(suppressed); 68 | return 1; 69 | } -------------------------------------------------------------------------------- /functions/predict_bbox.py: -------------------------------------------------------------------------------- 1 | #encoding:utf8 2 | from utils import bbox_helper 3 | from extensions import nms 4 | import torch 5 | import logging 6 | import numpy as np 7 | def to_np_array(x): 8 | if x is None: 9 | return None 10 | # if isinstance(x, Variable): x = x.data 11 | return x.cpu().data.numpy() if torch.is_tensor(x) else x 12 | 13 | def compute_predicted_bboxes(rois, pred_cls, pred_loc, image_info, cfg): 14 | ''' 15 | :param cfg: config 16 | :param rois: [N, k] k>=5, batch_ix, x1, y1, x2, y2 17 | :param pred_cls:[N, num_classes, 1, 1] 18 | :param pred_loc:[N, num_classes * 4, 1, 1] 19 | :param image_info:[N, 3] 20 | :return: bboxes: [M, 7], batch_ix, x1, y1, x2, y2, score, cls 21 | ''' 22 | # logger = logging.getLogger('global') 23 | rois, pred_cls, pred_loc = map(to_np_array, [rois, pred_cls, pred_loc]) 24 | N, num_classes = pred_cls.shape[0:2] 25 | B = max(rois[:, 0].astype(np.int32))+1 26 | assert(N == rois.shape[0]) 27 | nmsed_bboxes = [] 28 | for cls in range(1, num_classes): 29 | scores = pred_cls[:, cls].squeeze() 30 | deltas = pred_loc[:, cls*4:cls*4+4].squeeze() 31 | if cfg['bbox_normalize_stats_precomputed']: 32 | deltas = deltas * np.array(cfg['bbox_normalize_stds'])[np.newaxis, :]\ 33 | + np.array(cfg['bbox_normalize_means'])[np.newaxis, :] 34 | bboxes = bbox_helper.compute_loc_bboxes(rois[:,1:1+4], deltas) 35 | bboxes = np.hstack([bboxes, scores[:, np.newaxis]]) 36 | # for each image, do nms 37 | for b_ix in range(B): 38 | rois_ix = np.where(rois[:, 0] == b_ix)[0] 39 | pre_scores = scores[rois_ix] 40 | pre_bboxes = bboxes[rois_ix] 41 | pre_bboxes[:, :4] = bbox_helper.clip_bbox(pre_bboxes[:,:4], image_info[b_ix]) 42 | if cfg['score_thresh'] > 0: 43 | keep_ix = np.where(pre_scores > cfg['score_thresh'])[0] 44 | pre_scores = pre_scores[keep_ix] 45 | pre_bboxes = pre_bboxes[keep_ix] 46 | if pre_scores.size == 0: continue 47 | order = pre_scores.argsort()[::-1] 48 | pre_bboxes = pre_bboxes[order, :] 49 | keep_index = nms(torch.from_numpy(pre_bboxes).float().cuda(), cfg['nms_iou_thresh']).numpy() 50 | post_bboxes = pre_bboxes[keep_index] 51 | batch_ix = np.full(post_bboxes.shape[0], b_ix) 52 | batch_cls = np.full(post_bboxes.shape[0], cls) 53 | post_bboxes = np.hstack([batch_ix[:, np.newaxis], post_bboxes, batch_cls[:, np.newaxis]]) 54 | nmsed_bboxes.append(post_bboxes) 55 | nmsed_bboxes = np.vstack(nmsed_bboxes) 56 | if cfg['top_n'] > 0: 57 | top_n_bboxes = [] 58 | for b_ix in range(B): 59 | bboxes = nmsed_bboxes[nmsed_bboxes[:, 0] == b_ix] 60 | scores = bboxes[:, -2] 61 | order = scores.argsort()[::-1][:cfg['top_n']] 62 | bboxes = bboxes[order] 63 | top_n_bboxes.append(bboxes) 64 | nmsed_bboxes = np.vstack(top_n_bboxes) 65 | nmsed_bboxes = (torch.from_numpy(nmsed_bboxes)).float().cuda() 66 | return nmsed_bboxes 67 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/src/roi_pooling_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "roi_pooling_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 16 | 17 | // Number of ROIs 18 | int num_rois = THCudaTensor_size(state, rois, 0); 19 | int size_rois = THCudaTensor_size(state, rois, 1); 20 | if (size_rois != 5) 21 | { 22 | return 0; 23 | } 24 | 25 | // batch size 26 | // int batch_size = THCudaTensor_size(state, features, 0); 27 | // if (batch_size != 1) 28 | // { 29 | // return 0; 30 | // } 31 | // data height 32 | int data_height = THCudaTensor_size(state, features, 2); 33 | // data width 34 | int data_width = THCudaTensor_size(state, features, 3); 35 | // Number of channels 36 | int num_channels = THCudaTensor_size(state, features, 1); 37 | 38 | cudaStream_t stream = THCState_getCurrentStream(state); 39 | 40 | ROIPoolForwardLaucher( 41 | data_flat, spatial_scale, num_rois, data_height, 42 | data_width, num_channels, pooled_height, 43 | pooled_width, rois_flat, 44 | output_flat, argmax_flat, stream); 45 | 46 | return 1; 47 | } 48 | 49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale, 50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax) 51 | { 52 | // Grab the input tensor 53 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 54 | float * rois_flat = THCudaTensor_data(state, rois); 55 | 56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 57 | int * argmax_flat = THCudaIntTensor_data(state, argmax); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | // if (batch_size != 1) 70 | // { 71 | // return 0; 72 | // } 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIPoolBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, pooled_height, 84 | pooled_width, rois_flat, 85 | bottom_grad_flat, argmax_flat, stream); 86 | 87 | return 1; 88 | } 89 | -------------------------------------------------------------------------------- /extensions/_nms/src/cuda/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nms_kernel.h" 15 | 16 | __device__ inline float devIoU(float const * const a, float const * const b) { 17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 5]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 5 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 42 | block_boxes[threadIdx.x * 5 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 44 | block_boxes[threadIdx.x * 5 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 46 | block_boxes[threadIdx.x * 5 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 48 | block_boxes[threadIdx.x * 5 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 50 | } 51 | __syncthreads(); 52 | 53 | if (threadIdx.x < row_size) { 54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 55 | const float *cur_box = dev_boxes + cur_box_idx * 5; 56 | int i = 0; 57 | unsigned long long t = 0; 58 | int start = 0; 59 | if (row_start == col_start) { 60 | start = threadIdx.x + 1; 61 | } 62 | for (i = start; i < col_size; i++) { 63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | 73 | void _nms(int boxes_num, float * boxes_dev, 74 | unsigned long long * mask_dev, float nms_overlap_thresh) { 75 | 76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 77 | DIVUP(boxes_num, threadsPerBlock)); 78 | dim3 threads(threadsPerBlock); 79 | nms_kernel<<>>(boxes_num, 80 | nms_overlap_thresh, 81 | boxes_dev, 82 | mask_dev); 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /functions/rpn_proposal.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | from utils import bbox_helper 3 | from utils import anchor_helper 4 | from extensions import nms 5 | import torch 6 | import torch.nn.functional as F 7 | import numpy as np 8 | import logging 9 | logger = logging.getLogger('global') 10 | 11 | def to_np_array(x): 12 | if x is None: 13 | return None 14 | # if isinstance(x, Variable): x = x.data 15 | return x.cpu().data.numpy() if torch.is_tensor(x) else x 16 | 17 | def compute_rpn_proposals(conv_cls, conv_loc, cfg, image_info): 18 | ''' 19 | :argument 20 | cfg: configs 21 | conv_cls: FloatTensor, [batch, num_anchors * x, h, w], conv output of classification 22 | conv_loc: FloatTensor, [batch, num_anchors * 4, h, w], conv output of localization 23 | image_info: FloatTensor, [batch, 3], image size 24 | :returns 25 | proposals: Variable, [N, 5], 2-dim: batch_ix, x1, y1, x2, y2 26 | ''' 27 | 28 | batch_size, num_anchors_4, featmap_h, featmap_w = conv_loc.shape 29 | # [K*A, 4] 30 | anchors_overplane = anchor_helper.get_anchors_over_plane(featmap_h, featmap_w, 31 | cfg['anchor_ratios'], cfg['anchor_scales'], cfg['anchor_stride']) 32 | B = batch_size 33 | A = num_anchors = num_anchors_4 // 4 34 | assert(A * 4 == num_anchors_4) 35 | K = featmap_h * featmap_w 36 | 37 | cls_view = conv_cls.permute(0, 2, 3, 1).contiguous().view(B, K*A, -1).cpu().numpy() 38 | loc_view = conv_loc.permute(0, 2, 3, 1).contiguous().view(B, K*A, 4).cpu().numpy() 39 | if torch.is_tensor(image_info): 40 | image_info = image_info.cpu().numpy() 41 | 42 | #all_proposals = [bbox_helper.compute_loc_bboxes(anchors_overplane, loc_view[ix]) for ix in range(B)] 43 | # [B, K*A, 4] 44 | #pred_loc = np.stack(all_proposals, axis = 0) 45 | #pred_cls = cls_view 46 | batch_proposals = [] 47 | pre_nms_top_n = cfg['pre_nms_top_n'] 48 | for b_ix in range(B): 49 | scores = cls_view[b_ix, :, -1] # to compatible with sigmoid 50 | if pre_nms_top_n <= 0 or pre_nms_top_n > scores.shape[0]: 51 | order = scores.argsort()[::-1] 52 | else: 53 | inds = np.argpartition(-scores, pre_nms_top_n)[:pre_nms_top_n] 54 | order = np.argsort(-scores[inds]) 55 | order = inds[order] 56 | loc_delta = loc_view[b_ix, order, :] 57 | loc_anchors = anchors_overplane[order, :] 58 | scores = scores[order] 59 | boxes = bbox_helper.compute_loc_bboxes(loc_anchors, loc_delta) 60 | boxes = bbox_helper.clip_bbox(boxes, image_info[b_ix]) 61 | proposals = np.hstack([boxes, scores[:, np.newaxis]]) 62 | proposals = proposals[(proposals[:, 2] - proposals[:, 0] + 1 >= cfg['roi_min_size']) 63 | & (proposals[:, 3] - proposals[:, 1] + 1 >= cfg['roi_min_size'])] 64 | keep_index = nms(torch.from_numpy(proposals).float().cuda(), cfg['nms_iou_thresh']).numpy() 65 | if cfg['post_nms_top_n'] > 0: 66 | keep_index = keep_index[:cfg['post_nms_top_n']] 67 | proposals = proposals[keep_index] 68 | batch_ix = np.full(keep_index.shape, b_ix) 69 | proposals = np.hstack([batch_ix[:, np.newaxis], proposals]) 70 | batch_proposals.append(proposals) 71 | batch_proposals = (torch.from_numpy(np.vstack(batch_proposals))).float() 72 | if batch_proposals.dim() < 2: 73 | batch_proposals.unsqueeze(dim=0) 74 | return batch_proposals 75 | -------------------------------------------------------------------------------- /extensions/_bbox_helper/src/cuda/iou_overlap_kernel.cu: -------------------------------------------------------------------------------- 1 | // #ifdef __cplusplus 2 | // extern "C" { 3 | // #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "iou_overlap_kernel.h" 9 | 10 | 11 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0)) 12 | 13 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 14 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 15 | i += blockDim.x * gridDim.x) 16 | 17 | // CUDA: grid stride looping 18 | #define CUDA_KERNEL_LOOP(i, n) \ 19 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ 20 | i < (n); \ 21 | i += blockDim.x * gridDim.x) 22 | 23 | //__device__ inline float devIoU(float const * const a, float const * const b) { 24 | // float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]); 25 | // float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]); 26 | // float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f); 27 | // float interS = width * height; 28 | // float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 29 | // float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 30 | // return interS / (Sa + Sb - interS); 31 | //} 32 | 33 | __global__ void IOUOverlapKernel( 34 | const float* bbox1, 35 | const float* bbox2, 36 | const int size_bbox, 37 | const int num_bbox1, 38 | const int num_bbox2, 39 | float* top_data){ 40 | CUDA_KERNEL_LOOP(index, num_bbox1 * num_bbox2){ 41 | int b1 = index / num_bbox2; 42 | int b2 = index % num_bbox2; 43 | 44 | int base1 = b1 * size_bbox; 45 | float b1_x1 = bbox1[base1]; 46 | float b1_y1 = bbox1[base1 + 1]; 47 | float b1_x2 = bbox1[base1 + 2]; 48 | float b1_y2 = bbox1[base1 + 3]; 49 | float b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1); 50 | 51 | int base2 = b2 * size_bbox; 52 | float b2_x1 = bbox2[base2]; 53 | float b2_y1 = bbox2[base2 + 1]; 54 | float b2_x2 = bbox2[base2 + 2]; 55 | float b2_y2 = bbox2[base2 + 3]; 56 | float b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1); 57 | 58 | float left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2); 59 | float top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2); 60 | float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f); 61 | float interS = width * height; 62 | float unionS = fmaxf(b1_area + b2_area - interS, 1.0); 63 | top_data[b1 * num_bbox2 + b2] = interS / unionS; 64 | } 65 | } 66 | 67 | int IOUOverlap( 68 | const float* bboxes1_data, 69 | const float* bboxes2_data, 70 | const int size_bbox, 71 | const int num_bbox1, 72 | const int num_bbox2, 73 | float* top_data, 74 | cudaStream_t stream){ 75 | const int kThreadsPerBlock = 1024; 76 | int output_size = num_bbox1 * num_bbox2; 77 | //int output_size = num_bbox1; 78 | cudaError_t err; 79 | 80 | err = cudaGetLastError(); 81 | if(cudaSuccess != err) 82 | { 83 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 84 | __LINE__, cudaGetErrorString( err ) ); 85 | exit( -1 ); 86 | } 87 | 88 | IOUOverlapKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 89 | bboxes1_data, bboxes2_data, size_bbox, num_bbox1, num_bbox2, top_data); 90 | 91 | err = cudaGetLastError(); 92 | if(cudaSuccess != err) 93 | { 94 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 95 | __LINE__, cudaGetErrorString( err ) ); 96 | exit( -1 ); 97 | } 98 | 99 | return 1; 100 | } 101 | 102 | // #ifdef __cplusplus 103 | // } 104 | // #endif 105 | -------------------------------------------------------------------------------- /utils/anchor_helper.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | import numpy as np 3 | 4 | def get_anchors_over_grid(ratios, scales, stride): 5 | """ 6 | Generate anchor (reference) windows by enumerating aspect ratios X 7 | scales wrt a reference (0, 0, stride-1, stride-1) window. 8 | """ 9 | # ratios, scales = np.meshgrid(ratios, scales) 10 | scales = np.array(scales) * stride 11 | return generate_anchors(stride=stride, sizes=scales) 12 | 13 | scales, ratios = np.meshgrid(scales, ratios) 14 | sqrt_ratios = np.sqrt(ratios) 15 | ws = (scales / sqrt_ratios).reshape(-1,1) 16 | hs = (scales * sqrt_ratios).reshape(-1,1) 17 | x = np.round(ws / 2.0) 18 | y = np.round(hs / 2.0) 19 | return np.hstack([-x,-y,x,y]) + stride / 2 20 | 21 | def get_anchors_over_plane(featmap_h, featmap_w, anchor_ratios, anchor_scales, anchor_stride): 22 | # get anchors over one grid 23 | anchors_overgrid = get_anchors_over_grid(anchor_ratios, anchor_scales, anchor_stride) 24 | # spread anchors over each grid 25 | shift_x = np.arange(0, featmap_w) * anchor_stride 26 | shift_y = np.arange(0, featmap_h) * anchor_stride 27 | # [featmap_h, featmap_w] 28 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 29 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 30 | shift_x.ravel(), shift_y.ravel())).transpose() 31 | A = anchors_overgrid.shape[0] 32 | K = shifts.shape[0] 33 | anchors_overplane = (anchors_overgrid.reshape((1, A, 4)) + 34 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))) 35 | return anchors_overplane.reshape((K * A, 4)) 36 | 37 | def generate_anchors( 38 | stride=16, sizes=(32, 64), aspect_ratios=(0.5, 1, 2) 39 | ): 40 | """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors 41 | are centered on stride / 2, have (approximate) sqrt areas of the specified 42 | sizes, and aspect ratios as given. 43 | """ 44 | return _generate_anchors( 45 | stride, 46 | np.array(sizes, dtype=np.float) / stride, 47 | np.array(aspect_ratios, dtype=np.float) 48 | ) 49 | def _generate_anchors(base_size, scales, aspect_ratios): 50 | """Generate anchor (reference) windows by enumerating aspect ratios X 51 | scales wrt a reference (0, 0, base_size - 1, base_size - 1) window. 52 | """ 53 | anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1 54 | anchors = _ratio_enum(anchor, aspect_ratios) 55 | anchors = np.vstack( 56 | [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])] 57 | ) 58 | return anchors 59 | def _ratio_enum(anchor, ratios): 60 | """Enumerate a set of anchors for each aspect ratio wrt an anchor.""" 61 | w, h, x_ctr, y_ctr = _whctrs(anchor) 62 | size = w * h 63 | size_ratios = size / ratios 64 | ws = np.round(np.sqrt(size_ratios)) 65 | hs = np.round(ws * ratios) 66 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 67 | return anchors 68 | def _scale_enum(anchor, scales): 69 | """Enumerate a set of anchors for each scale wrt an anchor.""" 70 | w, h, x_ctr, y_ctr = _whctrs(anchor) 71 | ws = w * scales 72 | hs = h * scales 73 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 74 | return anchors 75 | def _whctrs(anchor): 76 | """Return width, height, x center, and y center for an anchor (window).""" 77 | w = anchor[2] - anchor[0] + 1 78 | h = anchor[3] - anchor[1] + 1 79 | x_ctr = anchor[0] + 0.5 * (w - 1) 80 | y_ctr = anchor[1] + 0.5 * (h - 1) 81 | return w, h, x_ctr, y_ctr 82 | def _mkanchors(ws, hs, x_ctr, y_ctr): 83 | """Given a vector of widths (ws) and heights (hs) around a center 84 | (x_ctr, y_ctr), output a set of anchors (windows). 85 | """ 86 | ws = ws[:, np.newaxis] 87 | hs = hs[:, np.newaxis] 88 | anchors = np.hstack( 89 | ( 90 | x_ctr - 0.5 * (ws - 1), 91 | y_ctr - 0.5 * (hs - 1), 92 | x_ctr + 0.5 * (ws - 1), 93 | y_ctr + 0.5 * (hs - 1) 94 | ) 95 | ) 96 | return anchors 97 | 98 | 99 | -------------------------------------------------------------------------------- /datasets/pycocotools/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /functions/proposal_assign.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import logging 4 | #from utils.timer import Timer 5 | 6 | def to_np_array(x): 7 | if x is None: 8 | return None 9 | # if isinstance(x, Variable): x = x.data 10 | return x.cpu().data.numpy() if torch.is_tensor(x) else np.array(x) 11 | 12 | def get_rois_target_levels(levels, base_scale, base_level, rois): 13 | '''assign proposals to different level feature map to roi pooling 14 | Args: 15 | rois: [R, 5], batch_ix,x1,y1,x2,y2 16 | levels: [L], levels. e.g.[2,3,4,5,6] 17 | ''' 18 | rois = to_np_array(rois) 19 | w = rois[:, 3] - rois[:, 1] + 1 20 | h = rois[:, 4] - rois[:, 2] + 1 21 | scale = (w * h)**0.5 22 | eps = 1e-6 23 | target_levels = np.floor(base_level + np.log2(scale/base_scale+eps)).astype(np.int32) 24 | min_level, max_level = min(levels), max(levels) 25 | return np.clip(target_levels, min_level, max_level) 26 | 27 | def get_rois_by_level(levels, base_scale, base_level, rois): 28 | rois = to_np_array(rois) 29 | target_lvls = get_rois_target_levels(levels, base_scale, base_level, rois) 30 | rois_by_level, rois_ix_by_level = [], [] 31 | for lvl in levels: 32 | ix = np.where(target_lvls == lvl)[0] 33 | rois_by_level.append(rois[ix]) 34 | rois_ix_by_level.append(ix) 35 | return rois_by_level, rois_ix_by_level 36 | 37 | def assign_args_by_level(levels, base_scale, base_level, rois, *args): 38 | ''' 39 | Args: 40 | rois: [R, 5], batch_ix,x1,y1,x2,y2 41 | levels: [L], levels. e.g.[2,3,4,5,6] 42 | return: 43 | args by level 44 | ''' 45 | args_by_level = [] 46 | rois = to_np_array(rois) 47 | rois_by_level, rois_ix_by_level = \ 48 | get_rois_by_level(levels, base_scale, base_level, rois) 49 | 50 | args_by_level.append(rois_by_level) 51 | for arg in args: 52 | # assign arg to each level 53 | arg = to_np_array(arg) 54 | arg_by_level = [] 55 | for ix in rois_ix_by_level: 56 | arg_by_level.append(arg[ix]) 57 | args_by_level.append(arg_by_level) 58 | return args_by_level 59 | 60 | def get_proposals_assign(proposals, base_scale=224, layer_index=4): 61 | ''' 62 | :arguement 63 | proposals:[N, k], k>=5, batch_idx, x1, y1, x2, y2 64 | base_scale: base scale 65 | layer_index: the layer RoI with wxh=224x22 should be mapped into 66 | returns: 67 | p*: [N, 5] 68 | ''' 69 | #logger = logging.getLogger('global') 70 | #p = map(lambda x: x.cpu().numpy() if torch.is_tensor(x) else x, [proposals]) 71 | p = to_np_array(proposals) 72 | w = p[:,3] - p[:,1] + 1 73 | h = p[:,4] - p[:,2] + 1 74 | area = (w*h)**0.5 75 | k = np.floor(layer_index + np.log2(area/base_scale)) 76 | p2 = p[k <= 2] 77 | p3 = p[k == 3] 78 | p4 = p[k == 4] 79 | p5 = p[k >= 5] 80 | return p2, p3, p4, p5 81 | 82 | def get_rois_assign(rois, cls_targets, loc_targets, loc_weights, base_scale=224, layer_index=4): 83 | #logger = logging.getLogger('global') 84 | #T = Timer() 85 | #roi = rois.data.cpu().numpy() 86 | #cls_t = cls_targets.data.cpu().numpy() 87 | #loc_t = loc_targets.data.cpu().numpy() 88 | #loc_w = loc_weights.data.cpu().numpy() 89 | roi = rois 90 | cls_t = cls_targets 91 | loc_t = loc_targets 92 | loc_w = loc_weights 93 | 94 | w = roi[:,3] - roi[:,1] + 1 95 | h = roi[:,4] - roi[:,2] + 1 96 | area = (w*h)**0.5 97 | k = np.floor(layer_index + np.log2(area/base_scale)) 98 | p2 = k <= 2 99 | p3 = k == 3 100 | p4 = k == 4 101 | p5 = k >= 5 102 | roi_new = [] 103 | cls_t_new = [] 104 | loc_t_new = [] 105 | loc_w_new = [] 106 | for p in [p2, p3, p4, p5]: 107 | roi_new.append(roi[p]) 108 | if np.where(p==True)[0].size > 0: 109 | cls_t_new.append(cls_t[p]) 110 | loc_t_new.append(loc_t[p]) 111 | loc_w_new.append(loc_w[p]) 112 | 113 | cuda_device = rois.device 114 | f = lambda x: (torch.from_numpy(x)).cuda() 115 | cls_ts = f(np.concatenate(cls_t_new)).long() 116 | loc_ts = f(np.vstack(loc_t_new)).float() 117 | loc_ws = f(np.vstack(loc_w_new)).float() 118 | return roi_new, cls_ts, loc_ts, loc_ws 119 | -------------------------------------------------------------------------------- /extensions/_focal_loss/src/focal_loss_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "cuda/focal_loss_sigmoid_kernel.h" 6 | #include "cuda/focal_loss_softmax_kernel.h" 7 | 8 | extern THCState *state; 9 | 10 | int focal_loss_sigmoid_forward_cuda( 11 | int N, 12 | THCudaTensor * logits, 13 | THCudaIntTensor * targets, 14 | float weight_pos, 15 | float gamma, 16 | float alpha, 17 | int num_classes, 18 | THCudaTensor * losses){ 19 | // Grab the input tensor 20 | float * logits_flat = THCudaTensor_data(state, logits); 21 | int * targets_flat = THCudaIntTensor_data(state, targets); 22 | 23 | float * losses_flat = THCudaTensor_data(state, losses); 24 | 25 | cudaStream_t stream = THCState_getCurrentStream(state); 26 | 27 | SigmoidFocalLossForwardLaucher( 28 | N, logits_flat, targets_flat, weight_pos, 29 | gamma, alpha, num_classes, losses_flat, stream); 30 | 31 | return 1; 32 | } 33 | 34 | int focal_loss_sigmoid_backward_cuda( 35 | int N, 36 | THCudaTensor * logits, 37 | THCudaIntTensor * targets, 38 | THCudaTensor * dX_data, 39 | float weight_pos, 40 | float gamma, 41 | float alpha, 42 | int num_classes){ 43 | // Grab the input tensor 44 | float * logits_flat = THCudaTensor_data(state, logits); 45 | int * targets_flat = THCudaIntTensor_data(state, targets); 46 | 47 | float * dX_data_flat = THCudaTensor_data(state, dX_data); 48 | 49 | cudaStream_t stream = THCState_getCurrentStream(state); 50 | SigmoidFocalLossBackwardLaucher( 51 | N, logits_flat, targets_flat, dX_data_flat, 52 | weight_pos, gamma, alpha, num_classes, stream); 53 | 54 | return 1; 55 | } 56 | 57 | int focal_loss_softmax_forward_cuda( 58 | int N, 59 | THCudaTensor * logits, 60 | THCudaIntTensor * targets, 61 | float weight_pos, 62 | float gamma, 63 | float alpha, 64 | int num_classes, 65 | THCudaTensor * losses, 66 | THCudaTensor * priors){ 67 | // Grab the input tensor 68 | float * logits_flat = THCudaTensor_data(state, logits); 69 | int * targets_flat = THCudaIntTensor_data(state, targets); 70 | 71 | float * losses_flat = THCudaTensor_data(state, losses); 72 | float * priors_flat = THCudaTensor_data(state, priors); 73 | 74 | cudaStream_t stream = THCState_getCurrentStream(state); 75 | 76 | SoftmaxFocalLossForwardLaucher( 77 | N, logits_flat, targets_flat, weight_pos, 78 | gamma, alpha, num_classes, losses_flat, priors_flat, stream); 79 | 80 | return 1; 81 | } 82 | 83 | int focal_loss_softmax_backward_cuda( 84 | int N, 85 | THCudaTensor * logits, 86 | THCudaIntTensor * targets, 87 | THCudaTensor * dX_data, 88 | float weight_pos, 89 | float gamma, 90 | float alpha, 91 | int num_classes, 92 | THCudaTensor * priors, 93 | THCudaTensor * buff){ 94 | // Grab the input tensor 95 | float * logits_flat = THCudaTensor_data(state, logits); 96 | int * targets_flat = THCudaIntTensor_data(state, targets); 97 | 98 | float * dX_data_flat = THCudaTensor_data(state, dX_data); 99 | float * priors_flat = THCudaTensor_data(state, priors); 100 | float * buff_flat = THCudaTensor_data(state, buff); 101 | 102 | cudaStream_t stream = THCState_getCurrentStream(state); 103 | SoftmaxFocalLossBackwardLaucher( 104 | N, logits_flat, targets_flat, dX_data_flat, 105 | weight_pos, gamma, alpha, num_classes, priors_flat, buff_flat, stream); 106 | 107 | return 1; 108 | } 109 | -------------------------------------------------------------------------------- /extensions/_roi_pooling/src/roi_pooling.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale, 5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output) 6 | { 7 | // Grab the input tensor 8 | float * data_flat = THFloatTensor_data(features); 9 | float * rois_flat = THFloatTensor_data(rois); 10 | 11 | float * output_flat = THFloatTensor_data(output); 12 | 13 | // Number of ROIs 14 | int num_rois = THFloatTensor_size(rois, 0); 15 | int size_rois = THFloatTensor_size(rois, 1); 16 | // batch size 17 | int batch_size = THFloatTensor_size(features, 0); 18 | if(batch_size != 1) 19 | { 20 | return 0; 21 | } 22 | // data height 23 | int data_height = THFloatTensor_size(features, 1); 24 | // data width 25 | int data_width = THFloatTensor_size(features, 2); 26 | // Number of channels 27 | int num_channels = THFloatTensor_size(features, 3); 28 | 29 | // Set all element of the output tensor to -inf. 30 | THFloatStorage_fill(THFloatTensor_storage(output), -1); 31 | 32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R 33 | int index_roi = 0; 34 | int index_output = 0; 35 | int n; 36 | for (n = 0; n < num_rois; ++n) 37 | { 38 | int roi_batch_ind = rois_flat[index_roi + 0]; 39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale); 40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale); 41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale); 42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale); 43 | // CHECK_GE(roi_batch_ind, 0); 44 | // CHECK_LT(roi_batch_ind, batch_size); 45 | 46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1); 47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1); 48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 50 | 51 | int index_data = roi_batch_ind * data_height * data_width * num_channels; 52 | const int output_area = pooled_width * pooled_height; 53 | 54 | int c, ph, pw; 55 | for (ph = 0; ph < pooled_height; ++ph) 56 | { 57 | for (pw = 0; pw < pooled_width; ++pw) 58 | { 59 | int hstart = (floor((float)(ph) * bin_size_h)); 60 | int wstart = (floor((float)(pw) * bin_size_w)); 61 | int hend = (ceil((float)(ph + 1) * bin_size_h)); 62 | int wend = (ceil((float)(pw + 1) * bin_size_w)); 63 | 64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height); 65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height); 66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width); 67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width); 68 | 69 | const int pool_index = index_output + (ph * pooled_width + pw); 70 | int is_empty = (hend <= hstart) || (wend <= wstart); 71 | if (is_empty) 72 | { 73 | for (c = 0; c < num_channels * output_area; c += output_area) 74 | { 75 | output_flat[pool_index + c] = 0; 76 | } 77 | } 78 | else 79 | { 80 | int h, w, c; 81 | for (h = hstart; h < hend; ++h) 82 | { 83 | for (w = wstart; w < wend; ++w) 84 | { 85 | for (c = 0; c < num_channels; ++c) 86 | { 87 | const int index = (h * data_width + w) * num_channels + c; 88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area]) 89 | { 90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index]; 91 | } 92 | } 93 | } 94 | } 95 | } 96 | } 97 | } 98 | 99 | // Increment ROI index 100 | index_roi += size_rois; 101 | index_output += pooled_height * pooled_width * num_channels; 102 | } 103 | return 1; 104 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![License CC BY-NC-SA 4.0](https://img.shields.io/badge/license-CC4.0-blue.svg) 2 | ![Python 3.6](https://img.shields.io/badge/python-3.6-green.svg) 3 | 4 | # SCDA 5 | 6 | The project of our work **"Adapting Object Detectors via Selective Cross-Domain Alignment" (CVPR2019)** 7 | 8 | ![img|center](./img/pipeline4.png) 9 | 10 | 11 | ## Quick View 12 | Quick view about the paper can be found in this [slide](https://drive.google.com/open?id=1P7gf9dicO1K07l-btBRgG3hZM9ofl0kC) 13 | 14 | ## Installation 15 | #### Requirements 16 | - PyTorch 0.4.1 (also test with 0.4.0) 17 | - torchvision 0.2.1 18 | - OpenCV 19 | - scikit-learn 20 | - Cython 21 | - GCC >= 4.9 (test with 5.4) 22 | 23 | #### Install 24 | 1. cd extensions; ./build_all.sh 25 | 2. cd datasets/pycocotools & make (install pycocotools according to the guideline) 26 | 27 | ## Data Preparation 28 | Download the cityscapes and foggy-cityscapes datasets from [cityscapes](https://www.cityscapes-dataset.com/downloads/). 29 | 30 | We provide the meta-files for training and validation, and you can find them in this [url](https://drive.google.com/open?id=1Cv6pLJh0E5elvhhTcXaH6eZXDvN8KfEX). It consists of train.txt, foggy_train.txt and foggy_val.txt. If you want to train with your own datasets, please custom these meta-files with your setting. 31 | 32 | ## Training 33 | We provide several training scripts for our three-types models. Following with the MMDetection, we use the slurm for distributed training (details can be found [here](https://github.com/open-mmlab/mmdetection/blob/master/tools/slurm_train.sh)). 34 | 35 | 1. you need to modify hyper-parameters in these bash scripts (./example/faster-rcnn/cityscapes/vgg/); For example, train_meta_file, target_meta_file and val_meta_file, etc. 36 | You also need to download these image-net pretrained models, such as vgg16. 37 | 2. the hyper-parameters of detection are placed in config_512.json, and you can custom them. 38 | 3. training: 39 | ```bash 40 | cd ./example/faster-rcnn/cityscapes/vgg/ 41 | sh 4cluster.sh # for our type-2 42 | 43 | # Details (we follow the slurm training in MMDetection); 44 | # For the detailed descriptions of these hyperparameters, please refer to the ./tools/faster_rcnn_train_val.py 45 | 46 | #!/bin/bash 47 | ROOT=../../../.. 48 | export PYTHONPATH=$ROOT:$PYTHONPATH 49 | #-------------------------- 50 | job_name=training_4cluster 51 | ckdir=4cluster 52 | mkdir ./${ckdir}/${job_name} 53 | #-------------------------- 54 | PARTITION=$1 55 | GPUS=${5:-8} 56 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 57 | 58 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \ 59 | --ntasks-per-node=${GPUS_PER_NODE} \ 60 | --job-name=${job_name} \ 61 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \ 62 | --config=config_512.json \ 63 | --dist=1 \ 64 | --fix_num=0 \ 65 | --L1=1 \ 66 | --cluster_num=4 \ 67 | --threshold=128 \ 68 | --recon_size=256 \ 69 | --port=21603 \ 70 | --arch=vgg16_FasterRCNN \ 71 | --warmup_epochs=1 \ 72 | --lr=0.0000125 \ 73 | --step_epochs=16,22 \ 74 | --batch-size=1 \ 75 | --epochs=25 \ 76 | --dataset=cityscapes \ 77 | --train_meta_file=/path/to/train.txt \ 78 | --target_meta_file=/path/to/foggy_train.txt \ 79 | --val_meta_file=/path/to/foggy_val.txt \ 80 | --datadir=/path/to/leftImg8bit/ \ 81 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \ 82 | --results_dir=${ckdir}/${job_name}/results_dir \ 83 | --save_dir=${ckdir}/${job_name} \ 84 | 2>&1 | tee ${ckdir}/${job_name}/train.log 85 | 86 | ``` 87 | 88 | 89 | ## Evaluation 90 | We provide our pre-trained model in this [url](https://drive.google.com/open?id=1FlQePvlavZVgmzBik6IdcG_xWh0xtROz). You can download it and make a test (please modify these parameters before evaluation). 91 | 92 | We support slurm evaluation and single-gpu evaluation. Please check the eval.sh and eval_single.sh 93 | ```bash 94 | sh eval_single.sh # evaluation with single gpu ==> mAP:33.91 95 | ``` 96 | 97 | ## Citation 98 | If you find our work useful in your research, please consider citing our paper: 99 | ``` 100 | @inproceedings{zhu2019adapting, 101 | title={Adapting Object Detectors via Selective Cross-Domain Alignment}, 102 | author={Zhu, Xinge and Pang, Jiangmiao and Yang, Ceyuan and Shi, Jianping and Lin, Dahua}, 103 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 104 | pages={687--696}, 105 | year={2019} 106 | } 107 | ``` 108 | 109 | ## Acknowledgments 110 | We thanks for the opensource codebases, [mmdetetion](https://github.com/open-mmlab/mmdetection) and [Detectron.pytorch](https://github.com/roytseng-tw/Detectron.pytorch). 111 | 112 | -------------------------------------------------------------------------------- /utils/bbox_helper.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | 3 | import numpy as np 4 | import warnings 5 | from extensions._cython_bbox import cython_bbox 6 | 7 | 8 | def bbox_iou_overlaps(b1, b2): 9 | return cython_bbox.bbox_overlaps(b1.astype(np.float32), b2.astype(np.float32)) 10 | ''' 11 | :argument 12 | b1,b2: [n, k], k>=4, x1,y1,x2,y2,... 13 | :returns 14 | intersection-over-union pair-wise. 15 | ''' 16 | area1 = (b1[:, 2] - b1[:, 0]) * (b1[:, 3] - b1[:, 1]) 17 | area2 = (b2[:, 2] - b2[:, 0]) * (b2[:, 3] - b2[:, 1]) 18 | inter_xmin = np.maximum(b1[:, 0].reshape(-1, 1), b2[:, 0].reshape(1, -1)) 19 | inter_ymin = np.maximum(b1[:, 1].reshape(-1, 1), b2[:, 1].reshape(1, -1)) 20 | inter_xmax = np.minimum(b1[:, 2].reshape(-1, 1), b2[:, 2].reshape(1, -1)) 21 | inter_ymax = np.minimum(b1[:, 3].reshape(-1, 1), b2[:, 3].reshape(1, -1)) 22 | inter_h = np.maximum(inter_xmax - inter_xmin, 0) 23 | inter_w = np.maximum(inter_ymax - inter_ymin, 0) 24 | inter_area = inter_h * inter_w 25 | union_area1 = area1.reshape(-1, 1) + area2.reshape(1, -1) 26 | union_area2 = (union_area1 - inter_area) 27 | return inter_area / np.maximum(union_area2, 1) 28 | 29 | def bbox_iof_overlaps(b1, b2): 30 | ''' 31 | :argument 32 | b1,b2: [n, k], k>=4 with x1,y1,x2,y2,.... 33 | :returns 34 | intersection-over-former-box pair-wise 35 | ''' 36 | area1 = (b1[:, 2] - b1[:, 0]) * (b1[:, 3] - b1[:, 1]) 37 | # area2 = (b2[:, 2] - b2[:, 0]) * (b2[:, 3] - b2[:, 1]) 38 | inter_xmin = np.maximum(b1[:, 0].reshape(-1, 1), b2[:, 0].reshape(1, -1)) 39 | inter_ymin = np.maximum(b1[:, 1].reshape(-1, 1), b2[:, 1].reshape(1, -1)) 40 | inter_xmax = np.minimum(b1[:, 2].reshape(-1, 1), b2[:, 2].reshape(1, -1)) 41 | inter_ymax = np.minimum(b1[:, 3].reshape(-1, 1), b2[:, 3].reshape(1, -1)) 42 | inter_h = np.maximum(inter_xmax - inter_xmin, 0) 43 | inter_w = np.maximum(inter_ymax - inter_ymin, 0) 44 | inter_area = inter_h * inter_w 45 | return inter_area / np.maximum(area1[:,np.newaxis], 1) 46 | 47 | def center_to_corner(boxes): 48 | ''' 49 | :argument 50 | boxes: [N, 4] of center_x, center_y, w, h 51 | :returns 52 | boxes: [N, 4] of xmin, ymin, xmax, ymax 53 | ''' 54 | xmin = boxes[:, 0] - boxes[:, 2] / 2. 55 | ymin = boxes[:, 1] - boxes[:, 3] / 2. 56 | xmax = boxes[:, 0] + boxes[:, 2] / 2. 57 | ymax = boxes[:, 1] + boxes[:, 3] / 2. 58 | return np.vstack([xmin, ymin, xmax, ymax]).transpose() 59 | 60 | def corner_to_center(boxes): 61 | ''' 62 | inverse of center_to_corner 63 | ''' 64 | cx = (boxes[:, 0] + boxes[:, 2]) / 2. 65 | cy = (boxes[:, 1] + boxes[:, 3]) / 2. 66 | w = (boxes[:, 2] - boxes[:, 0]) 67 | h = (boxes[:, 3] - boxes[:, 1]) 68 | return np.vstack([cx, cy, w, h]).transpose() 69 | 70 | def compute_loc_targets(raw_bboxes, gt_bboxes): 71 | ''' 72 | :argument 73 | raw_bboxes, gt_bboxes:[N, k] first dim must be equal 74 | :returns 75 | loc_targets:[N, 4] 76 | ''' 77 | bb = corner_to_center(raw_bboxes) # cx, cy, w, h 78 | gt = corner_to_center(gt_bboxes) 79 | assert (np.all(bb[:, 2] > 0)) 80 | assert (np.all(bb[:, 3] > 0)) 81 | trgt_ctr_x = (gt[:, 0] - bb[:, 0]) / bb[:, 2] 82 | trgt_ctr_y = (gt[:, 1] - bb[:, 1]) / bb[:, 3] 83 | trgt_w = np.log(gt[:, 2] / bb[:, 2]) 84 | trgt_h = np.log(gt[:, 3] / bb[:, 3]) 85 | return np.vstack([trgt_ctr_x, trgt_ctr_y, trgt_w, trgt_h]).transpose() 86 | 87 | 88 | def compute_loc_bboxes(raw_bboxes, deltas): 89 | ''' 90 | :argument 91 | raw_bboxes, delta:[N, k] first dim must be equal 92 | :returns 93 | bboxes:[N, 4] 94 | ''' 95 | with warnings.catch_warnings(record=True) as w: 96 | warnings.simplefilter("always") 97 | bb = corner_to_center(raw_bboxes) # cx, cy, w, h 98 | dt_cx = deltas[:, 0] * bb[:, 2] + bb[:, 0] 99 | dt_cy = deltas[:, 1] * bb[:, 3] + bb[:, 1] 100 | dt_w = np.exp(deltas[:, 2]) * bb[:, 2] 101 | dt_h = np.exp(deltas[:, 3]) * bb[:, 3] 102 | dt = np.vstack([dt_cx, dt_cy, dt_w, dt_h]).transpose() 103 | return center_to_corner(dt) 104 | 105 | def clip_bbox(bbox, img_size): 106 | h, w = img_size[:2] 107 | bbox[:, 0] = np.clip(bbox[:, 0], 0, w - 1) 108 | bbox[:, 1] = np.clip(bbox[:, 1], 0, h - 1) 109 | bbox[:, 2] = np.clip(bbox[:, 2], 0, w - 1) 110 | bbox[:, 3] = np.clip(bbox[:, 3], 0, h - 1) 111 | return bbox 112 | 113 | def compute_recall(box_pred, box_gt): 114 | n_gt = box_gt.shape[0] 115 | if box_pred.size == 0 or n_gt == 0: 116 | return 0, n_gt 117 | ov = bbox_iou_overlaps(box_gt, box_pred) 118 | max_ov = np.max(ov, axis=1) 119 | idx = np.where(max_ov>0.5)[0] 120 | n_rc = idx.size 121 | return n_rc, n_gt 122 | 123 | -------------------------------------------------------------------------------- /datasets/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | #import pycocotools._mask as _mask 4 | from . import _mask 5 | 6 | # Interface for manipulating masks stored in RLE format. 7 | # 8 | # RLE is a simple yet efficient format for storing binary masks. RLE 9 | # first divides a vector (or vectorized image) into a series of piecewise 10 | # constant regions and then for each piece simply stores the length of 11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 13 | # (note that the odd counts are always the numbers of zeros). Instead of 14 | # storing the counts directly, additional compression is achieved with a 15 | # variable bitrate representation based on a common scheme called LEB128. 16 | # 17 | # Compression is greatest given large piecewise constant regions. 18 | # Specifically, the size of the RLE is proportional to the number of 19 | # *boundaries* in M (or for an image the number of boundaries in the y 20 | # direction). Assuming fairly simple shapes, the RLE representation is 21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 22 | # is substantially lower, especially for large simple objects (large n). 23 | # 24 | # Many common operations on masks can be computed directly using the RLE 25 | # (without need for decoding). This includes computations such as area, 26 | # union, intersection, etc. All of these operations are linear in the 27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 28 | # of the object. Computing these operations on the original mask is O(n). 29 | # Thus, using the RLE can result in substantial computational savings. 30 | # 31 | # The following API functions are defined: 32 | # encode - Encode binary masks using RLE. 33 | # decode - Decode binary masks encoded via RLE. 34 | # merge - Compute union or intersection of encoded masks. 35 | # iou - Compute intersection over union between masks. 36 | # area - Compute area of encoded masks. 37 | # toBbox - Get bounding boxes surrounding encoded masks. 38 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 39 | # 40 | # Usage: 41 | # Rs = encode( masks ) 42 | # masks = decode( Rs ) 43 | # R = merge( Rs, intersect=false ) 44 | # o = iou( dt, gt, iscrowd ) 45 | # a = area( Rs ) 46 | # bbs = toBbox( Rs ) 47 | # Rs = frPyObjects( [pyObjects], h, w ) 48 | # 49 | # In the API the following formats are used: 50 | # Rs - [dict] Run-length encoding of binary masks 51 | # R - dict Run-length encoding of binary mask 52 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 53 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 54 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 55 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 56 | # dt,gt - May be either bounding boxes or encoded masks 57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 58 | # 59 | # Finally, a note about the intersection over union (iou) computation. 60 | # The standard iou of a ground truth (gt) and detected (dt) object is 61 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 62 | # For "crowd" regions, we use a modified criteria. If a gt object is 63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 64 | # Choosing gt' in the crowd gt that best matches the dt can be done using 65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 66 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 67 | # For crowd gt regions we use this modified criteria above for the iou. 68 | # 69 | # To compile run "python setup.py build_ext --inplace" 70 | # Please do not contact us for help with compiling. 71 | # 72 | # Microsoft COCO Toolbox. version 2.0 73 | # Data, paper, and tutorials available at: http://mscoco.org/ 74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 75 | # Licensed under the Simplified BSD License [see coco/license.txt] 76 | 77 | iou = _mask.iou 78 | merge = _mask.merge 79 | frPyObjects = _mask.frPyObjects 80 | 81 | def encode(bimask): 82 | if len(bimask.shape) == 3: 83 | return _mask.encode(bimask) 84 | elif len(bimask.shape) == 2: 85 | h, w = bimask.shape 86 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 87 | 88 | def decode(rleObjs): 89 | if type(rleObjs) == list: 90 | return _mask.decode(rleObjs) 91 | else: 92 | return _mask.decode([rleObjs])[:,:,0] 93 | 94 | def area(rleObjs): 95 | if type(rleObjs) == list: 96 | return _mask.area(rleObjs) 97 | else: 98 | return _mask.area([rleObjs])[0] 99 | 100 | def toBbox(rleObjs): 101 | if type(rleObjs) == list: 102 | return _mask.toBbox(rleObjs) 103 | else: 104 | return _mask.toBbox([rleObjs])[0] 105 | -------------------------------------------------------------------------------- /extensions/_focal_loss/src/cuda/focal_loss_sigmoid_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "focal_loss_sigmoid_kernel.h" 5 | 6 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0)) 7 | 8 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 9 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 10 | i += blockDim.x * gridDim.x) 11 | 12 | __global__ void SigmoidFocalLossKernel( 13 | const int N, const float* logits, 14 | const int* targets, const float weight_pos, 15 | const float gamma, const float alpha, 16 | const int num_classes, float* losses) { 17 | CUDA_1D_KERNEL_LOOP(i, N) { 18 | int d = i % num_classes; //current class 19 | int tmp = i / num_classes; //targets index 20 | int t = targets[tmp]; 21 | 22 | // check whether the class is true class or not. 23 | // The target classes are in range 1 - 81 and the d is in range 0-80 24 | // because we predict A*80 dim, so for comparison purpose, compare t and (d+1) 25 | float c1 = (t == (d + 1)); 26 | float c2 = (t != -1 & t != (d + 1)); 27 | 28 | float Np = max(weight_pos, 1.0); 29 | float zn = (1.0 - alpha) / Np; 30 | float zp = alpha / Np; 31 | 32 | // p = 1. / 1. + expf(-x) 33 | float p = 1. / (1. + expf(-logits[i])); 34 | 35 | // (1 - p)**gamma * log(p) where 36 | float term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN)); 37 | // p**gamma * log(1 - p) 38 | float term2 = 39 | powf(p, gamma) * 40 | (-1. * logits[i] * (logits[i] >= 0) - 41 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))); 42 | 43 | losses[i] = 0.0; 44 | losses[i] += -c1 * term1 * zp; 45 | losses[i] += -c2 * term2 * zn; 46 | } 47 | } 48 | 49 | __global__ void SigmoidFocalLossGradientKernel( 50 | const int N, const float* logits, 51 | const int* targets, float* dX_data, const float weight_pos, 52 | const float gamma, const float alpha, const int num_classes) { 53 | CUDA_1D_KERNEL_LOOP(i, N) { 54 | int d = i % num_classes; //current class 55 | int tmp = i / num_classes; //targets index 56 | int t = targets[tmp]; 57 | 58 | float Np = max(weight_pos, 1.0); 59 | float zn = (1.0 - alpha) / Np; 60 | float zp = alpha / Np; 61 | //int t = targets[n * (H * W * A) + a * (H * W) + y * W + x]; 62 | 63 | float c1 = (t == (d + 1)); 64 | float c2 = (t != -1 & t != (d + 1)); 65 | float p = 1. / (1. + expf(-logits[i])); 66 | 67 | // (1-p)**g * (1 - p - g*p*log(p)) 68 | float term1 = 69 | powf((1. - p), gamma) * 70 | (1. - p - (p * gamma * logf(max(p, FLT_MIN)))); 71 | // (p**g) * (g*(1-p)*log(1-p) - p) 72 | float term2 = 73 | powf(p, gamma) * 74 | ((-1. * logits[i] * (logits[i] >= 0) - 75 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) * 76 | (1. - p) * gamma - p); 77 | dX_data[i] = 0.0; 78 | dX_data[i] += -c1 * zp * term1; 79 | dX_data[i] += -c2 * zn * term2; 80 | } 81 | } 82 | 83 | int SigmoidFocalLossForwardLaucher( 84 | const int N, const float* logits, 85 | const int* targets, const float weight_pos, 86 | const float gamma, const float alpha, 87 | const int num_classes, float* losses, cudaStream_t stream){ 88 | 89 | const int kThreadsPerBlock = 1024; 90 | int output_size = N; 91 | cudaError_t err; 92 | 93 | err = cudaGetLastError(); 94 | if(cudaSuccess != err) 95 | { 96 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 97 | __LINE__, cudaGetErrorString( err ) ); 98 | exit( -1 ); 99 | } 100 | SigmoidFocalLossKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 101 | N, logits, targets, weight_pos, gamma, alpha, num_classes, losses); 102 | err = cudaGetLastError(); 103 | if(cudaSuccess != err) 104 | { 105 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 106 | __LINE__, cudaGetErrorString( err ) ); 107 | exit( -1 ); 108 | } 109 | 110 | return 1; 111 | } 112 | 113 | 114 | int SigmoidFocalLossBackwardLaucher( 115 | const int N, const float* logits, const int* targets, 116 | float* dX_data, const float weight_pos, 117 | const float gamma, const float alpha, const int num_classes, 118 | cudaStream_t stream){ 119 | 120 | const int kThreadsPerBlock = 1024; 121 | int output_size = N; 122 | cudaError_t err; 123 | 124 | SigmoidFocalLossGradientKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 125 | N, logits, targets, dX_data, weight_pos, gamma, alpha, num_classes); 126 | err = cudaGetLastError(); 127 | if(cudaSuccess != err) 128 | { 129 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 130 | __LINE__, cudaGetErrorString( err ) ); 131 | exit( -1 ); 132 | } 133 | 134 | return 1; 135 | } 136 | 137 | 138 | -------------------------------------------------------------------------------- /functions/anchor_target.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | # from utils.debug_helper import debugger 3 | from utils import bbox_helper 4 | from utils import anchor_helper 5 | import numpy as np 6 | import torch 7 | import logging 8 | logger = logging.getLogger('global') 9 | 10 | def to_np_array(x): 11 | if x is None: 12 | return None 13 | # if isinstance(x, Variable): x = x.data 14 | return x.cpu().data.numpy() if torch.is_tensor(x) else np.array(x) 15 | 16 | def compute_anchor_targets(feature_size, cfg, ground_truth_bboxes, image_info, ignore_regions = None): 17 | r''' 18 | :argument 19 | cfg.keys(): { 20 | 'anchor_ratios', anchor_scales, anchor_stride, 21 | negative_iou_thresh, ignore_iou_thresh,positive_iou_thresh, 22 | positive_percent, rpn_batch_size 23 | } 24 | feature_size: IntTensor, [4]. i.e. batch, num_anchors * 4, height, width 25 | ground_truth_bboxes: FloatTensor, [batch, max_num_gt_bboxes, 5] 26 | image_info: FloatTensor, [batch, 3] 27 | ignore_regions: FloatTensor, [batch, max_num_ignore_regions, 4] 28 | :returns 29 | cls_targets: Variable, [batch, num_anchors * 1, height, width] 30 | loc_targets, loc_masks: Variable, [batch, num_anchors * 4, height, width] 31 | ''' 32 | cuda_device = ground_truth_bboxes.device 33 | ground_truth_bboxes, image_info, ignore_regions = \ 34 | map(to_np_array, [ground_truth_bboxes, image_info, ignore_regions]) 35 | 36 | batch_size, num_anchors_4, featmap_h, featmap_w = feature_size 37 | num_anchors = num_anchors_4 // 4 38 | assert(num_anchors * 4 == num_anchors_4) 39 | # [K*A, 4] 40 | anchors_overplane = anchor_helper.get_anchors_over_plane( 41 | featmap_h, featmap_w, cfg['anchor_ratios'], cfg['anchor_scales'], 42 | cfg['anchor_stride']) 43 | 44 | B = batch_size 45 | A = num_anchors 46 | K = featmap_h * featmap_w 47 | G = ground_truth_bboxes.shape[1] 48 | 49 | # compute overlaps between anchors and gt_bboxes within each batch 50 | # shape: [B, K*A, G] 51 | overlaps = np.stack([bbox_helper.bbox_iou_overlaps(anchors_overplane, 52 | ground_truth_bboxes[ix]) for ix in range(B)], axis = 0) 53 | 54 | # shape of [B, K*A] 55 | argmax_overlaps = overlaps.argmax(axis = 2) 56 | max_overlaps = overlaps.max(axis = 2) 57 | 58 | # [B, G] 59 | gt_max_overlaps = overlaps.max(axis=1) 60 | # ignore thoese gt_max_overlap too small 61 | gt_max_overlaps[gt_max_overlaps < 0.1] = -1 62 | gt_argmax_b_ix, gt_argmax_ka_ix, gt_argmax_g_ix = \ 63 | np.where(overlaps == gt_max_overlaps[:, np.newaxis, :]) 64 | # match each anchor to the ground truth bbox 65 | argmax_overlaps[gt_argmax_b_ix, gt_argmax_ka_ix] = gt_argmax_g_ix 66 | 67 | labels = np.empty([B, K*A], dtype=np.int64) 68 | labels.fill(-1) 69 | labels[max_overlaps < cfg['negative_iou_thresh']] = 0 70 | 71 | # remove negatives located in ignore regions 72 | if ignore_regions is not None: 73 | iof_overlaps = np.stack([bbox_helper.bbox_iof_overlaps 74 | (anchors_overplane, ignore_regions[ix]) for ix in range(B)], axis=0) 75 | max_iof_overlaps = iof_overlaps.max(axis=2) # [B, K*A] 76 | labels[max_iof_overlaps > cfg['ignore_iou_thresh']] = -1 77 | 78 | labels[gt_argmax_b_ix, gt_argmax_ka_ix] = 1 79 | labels[max_overlaps > cfg['positive_iou_thresh']] = 1 80 | 81 | # sampling 82 | num_pos_sampling = int(cfg['positive_percent'] * cfg['rpn_batch_size'] * batch_size) 83 | pos_b_ix, pos_ka_ix = np.where(labels > 0) 84 | num_positives = len(pos_b_ix) 85 | if num_positives > num_pos_sampling: 86 | remove_ix = np.random.choice(num_positives, size = num_positives - num_pos_sampling, replace = False) 87 | labels[pos_b_ix[remove_ix], pos_ka_ix[remove_ix]] = -1 88 | num_positives = num_pos_sampling 89 | num_neg_sampling = cfg['rpn_batch_size'] * batch_size - num_positives 90 | neg_b_ix, neg_ka_ix = np.where(labels == 0) 91 | num_negatives = len(neg_b_ix) 92 | if num_negatives > num_neg_sampling: 93 | remove_ix = np.random.choice(num_negatives, size = num_negatives - num_neg_sampling, replace = False) 94 | labels[neg_b_ix[remove_ix], neg_ka_ix[remove_ix]] = -1 95 | 96 | pos_b_ix, pos_ka_ix = np.where(labels > 0) 97 | pos_anchors = anchors_overplane[pos_ka_ix, :] 98 | 99 | pos_target_ix = argmax_overlaps[pos_b_ix, pos_ka_ix] 100 | pos_target_gt = ground_truth_bboxes[pos_b_ix, pos_target_ix] 101 | pos_loc_targets = bbox_helper.compute_loc_targets(pos_anchors, pos_target_gt) 102 | 103 | loc_targets = np.zeros([B, K*A, 4], dtype = np.float32) 104 | loc_targets[pos_b_ix, pos_ka_ix, :] = pos_loc_targets 105 | # loc_weights = np.zeros([B, K*A, 4]) 106 | loc_masks = np.zeros([B, K*A, 4], dtype = np.float32) 107 | loc_masks[pos_b_ix, pos_ka_ix, :] = 1. 108 | 109 | # transpose to match the predicted convolution shape 110 | 111 | cls_targets = torch.from_numpy(labels).long().view(B, featmap_h, featmap_w, A).permute(0, 3, 1, 2).cuda().contiguous() 112 | loc_targets = torch.from_numpy(loc_targets).float().view(B, featmap_h, featmap_w, A * 4).permute(0, 3, 1, 2).cuda().contiguous() 113 | loc_masks = torch.from_numpy(loc_masks).float().view(B, featmap_h, featmap_w, A * 4).permute(0, 3, 1, 2).cuda().contiguous() 114 | loc_nomalizer = max(1,len(np.where(labels >= 0)[0])) 115 | logger.debug('positive anchors:%d' % len(pos_b_ix)) 116 | return cls_targets, loc_targets, loc_masks, loc_nomalizer 117 | -------------------------------------------------------------------------------- /extensions/_focal_loss/src/cuda/focal_loss_softmax_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "focal_loss_softmax_kernel.h" 5 | 6 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0)) 7 | 8 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 9 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 10 | i += blockDim.x * gridDim.x) 11 | 12 | __global__ void SpatialSoftmaxKernel(const int N, const float* Xdata, float* Pdata, 13 | const int num_classes) { 14 | CUDA_1D_KERNEL_LOOP(index, N / num_classes) { 15 | int base = index * num_classes; //base index 16 | 17 | // Subtract max on each cell for numerical reasons 18 | float max_val = -FLT_MAX; 19 | for(int c = 0; c < num_classes; ++c) { 20 | max_val = max(max_val, Xdata[base + c]); 21 | } 22 | // Exponentiate 23 | float expsum = 0.0f; 24 | for(int c = 0; c < num_classes; ++c) { 25 | float expx = expf(Xdata[base + c] - max_val); 26 | Pdata[base + c] = expx; 27 | expsum += expx; 28 | } 29 | // Normalize 30 | for(int c = 0; c < num_classes; ++c) { 31 | Pdata[base + c] /= expsum; 32 | } 33 | } 34 | } 35 | 36 | __global__ void SoftmaxFocalLossKernel( 37 | const int N, 38 | const float* Pdata, const int* targets, float* losses, 39 | const float weight_pos, const float gamma, const float alpha, 40 | const int num_classes) { 41 | CUDA_1D_KERNEL_LOOP(i, N / num_classes) { 42 | 43 | int base = i * num_classes; 44 | const int label = static_cast(targets[i]); 45 | 46 | float Np = max(weight_pos, 1.0); 47 | float z = (label == 0) * (1 - alpha) / Np + 48 | (label >= 1) * alpha / Np; 49 | 50 | losses[i] = 0.0; 51 | if (label >= 0) { 52 | losses[i] = 53 | -(powf(1.0 - Pdata[base + label], gamma) * 54 | log(max(Pdata[base + label], FLT_MIN))) * z; 55 | } 56 | } 57 | } 58 | 59 | __global__ void SoftmaxFocalLossGradientWeightKernel( 60 | const int N, 61 | const float* Pdata, const int* targets, float* buff, 62 | const float weight_pos, const float gamma, const float alpha, 63 | const int num_classes) { 64 | CUDA_1D_KERNEL_LOOP(i, N / num_classes) { 65 | 66 | int base = i * num_classes; 67 | const int label = static_cast(targets[i]); 68 | float Np = max(weight_pos, 1.0); 69 | float z = (label == 0) * (1 - alpha) / Np + 70 | (label >= 1) * alpha / Np; 71 | 72 | buff[i] = 0.0; 73 | if (label >= 0) { 74 | float onemp = 1. - Pdata[base + label]; 75 | float p = Pdata[base + label]; 76 | buff[i] = 77 | (-powf(onemp, gamma) + 78 | gamma * powf(onemp, gamma - 1) * p * log(max(p, FLT_MIN))) * z; 79 | } 80 | } 81 | } 82 | 83 | 84 | __global__ void SoftmaxFocalLossGradientKernel( 85 | const int N, 86 | const float* Pdata, const int* targets, const float* buff, 87 | float* dX, const int num_classes) { 88 | CUDA_1D_KERNEL_LOOP(i, N) { 89 | 90 | int ind = i / num_classes; 91 | int cls = i % num_classes; 92 | 93 | const int label = static_cast(targets[ind]); 94 | 95 | float c1 = (label >= 0) * 1.0; 96 | float c2 = (label == cls) * 1.0; 97 | dX[i] = 0.0; 98 | dX[i] = c1 * buff[ind] * (c2 - Pdata[i]); 99 | } 100 | } 101 | 102 | int SoftmaxFocalLossForwardLaucher( 103 | const int N, const float* logits, 104 | const int* targets, const float weight_pos, 105 | const float gamma, const float alpha, 106 | const int num_classes, float* losses, 107 | float* priors, cudaStream_t stream){ 108 | 109 | const int kThreadsPerBlock = 1024; 110 | int output_size = N; 111 | cudaError_t err; 112 | 113 | err = cudaGetLastError(); 114 | if(cudaSuccess != err) 115 | { 116 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 117 | __LINE__, cudaGetErrorString( err ) ); 118 | exit( -1 ); 119 | } 120 | SpatialSoftmaxKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 121 | N, logits, priors, num_classes); 122 | 123 | SoftmaxFocalLossKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 124 | N, priors, targets, losses, weight_pos, gamma, alpha, num_classes); 125 | 126 | 127 | err = cudaGetLastError(); 128 | if(cudaSuccess != err) 129 | { 130 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 131 | __LINE__, cudaGetErrorString( err ) ); 132 | exit( -1 ); 133 | } 134 | 135 | return 1; 136 | } 137 | 138 | 139 | int SoftmaxFocalLossBackwardLaucher( 140 | const int N, const float* logits, const int* targets, 141 | float* dX_data, const float weight_pos, 142 | const float gamma, const float alpha, const int num_classes, 143 | const float* priors, float* buff, cudaStream_t stream){ 144 | 145 | const int kThreadsPerBlock = 1024; 146 | int output_size = N; 147 | cudaError_t err; 148 | 149 | SoftmaxFocalLossGradientWeightKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 150 | N, priors, targets, buff, weight_pos, gamma, alpha, num_classes); 151 | 152 | SoftmaxFocalLossGradientKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 153 | N, priors, targets, buff, dX_data, num_classes); 154 | 155 | err = cudaGetLastError(); 156 | if(cudaSuccess != err) 157 | { 158 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__, 159 | __LINE__, cudaGetErrorString( err ) ); 160 | exit( -1 ); 161 | } 162 | 163 | return 1; 164 | } 165 | 166 | 167 | -------------------------------------------------------------------------------- /datasets/coco_loader.py: -------------------------------------------------------------------------------- 1 | #encoding: utf-8 2 | 3 | import torch 4 | import torch.utils.data 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import numpy as np 8 | import logging 9 | #logger = logging.getLogger('global') 10 | 11 | def to_np_array(x): 12 | if x is None: 13 | return None 14 | if isinstance(x, Variable): x = x.data 15 | return x.cpu().numpy() if torch.is_tensor(x) else np.array(x) 16 | 17 | class COCODataLoader(torch.utils.data.DataLoader): 18 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, 19 | num_workers=0, pin_memory=False, drop_last=False): 20 | super(COCODataLoader, self).__init__(dataset, batch_size, shuffle, sampler, batch_sampler, 21 | num_workers, self._collate_fn, pin_memory, drop_last) 22 | def _collate_fn(self, batch): 23 | ''' 24 | Return: a mini-batch of data: 25 | image_data: FloatTensor of image, with shape of [b, 3, max_h, max_w] 26 | image_info: np.array of shape [b, 5], (resized_image_h, resized_image_w, resize_scale, origin_image_h, origin_image_w) 27 | bboxes: np.array of shape [b, max_num_gts, 5] 28 | keypoints: np.array of shape[b, max_num_gts, k, 2] 29 | masks: np.array of shape [b, max_num_gts, max_h, max_w] 30 | filename: list of str 31 | ''' 32 | batch_size = len(batch) 33 | 34 | zip_batch = list(zip(*batch)) 35 | images = zip_batch[0] 36 | unpad_image_sizes = zip_batch[1] 37 | ground_truth_bboxes = zip_batch[2] 38 | ignore_regions = zip_batch[3] 39 | ground_truth_keypoints = zip_batch[4] 40 | ground_truth_masks = zip_batch[5] 41 | filenames = zip_batch[6] 42 | has_keyp = ground_truth_keypoints[0] is not None 43 | has_mask = ground_truth_masks[0] is not None 44 | 45 | 46 | max_img_h = max([_.shape[-2] for _ in images]) 47 | max_img_w = max([_.shape[-1] for _ in images]) 48 | 49 | max_img_h = int(np.ceil(max_img_h / 128.0) * 128) 50 | max_img_w = int(np.ceil(max_img_w / 128.0) * 128) 51 | 52 | max_num_gt_bboxes = max([_.shape[0] for _ in ground_truth_bboxes]) 53 | max_num_ig_bboxes = max([_.shape[0] for _ in ignore_regions]) 54 | assert(max_num_gt_bboxes > 0) 55 | assert(max_num_ig_bboxes > 0) 56 | 57 | padded_images = [] 58 | padded_gt_bboxes = [] 59 | padded_ig_bboxes = [] 60 | padded_gt_keypoints = [] if has_keyp else None 61 | padded_gt_masks = [] if has_mask else None 62 | for b_ix in range(batch_size): 63 | img = images[b_ix] 64 | 65 | # pad zeros to right bottom of each image 66 | pad_size = (0, max_img_w - img.shape[-1], 0, max_img_h - img.shape[-2]) 67 | padded_images.append(F.pad(img, pad_size, 'constant', 0).data.cpu()) 68 | 69 | # pad zeros to gt_bboxes 70 | gt_bboxes = to_np_array(ground_truth_bboxes[b_ix]) 71 | new_gt_bboxes = np.zeros([max_num_gt_bboxes, gt_bboxes.shape[-1]]) 72 | new_gt_bboxes[range(gt_bboxes.shape[0]), :] = gt_bboxes 73 | padded_gt_bboxes.append(new_gt_bboxes) 74 | 75 | # pad zeros to ig_bboxes 76 | ig_bboxes = to_np_array(ignore_regions[b_ix]) 77 | new_ig_bboxes = np.zeros([max_num_ig_bboxes, ig_bboxes.shape[-1]]) 78 | new_ig_bboxes[range(ig_bboxes.shape[0]), :] = ig_bboxes 79 | padded_ig_bboxes.append(new_ig_bboxes) 80 | 81 | # pad zero to keypoints 82 | if has_keyp: 83 | keypoints = to_np_array(ground_truth_keypoints[b_ix]) 84 | shape = keypoints.shape 85 | new_keypoints = np.zeros([max_num_gt_bboxes, shape[1], shape[2]]) 86 | new_keypoints[range(keypoints.shape[0]), ...] = keypoints 87 | padded_gt_keypoints.append(new_keypoints) 88 | 89 | # pad zeros to masks 90 | if has_mask: 91 | # [n, img_h, img_w] -> [n, max_img_h, max_img_w] 92 | masks = torch.from_numpy(ground_truth_masks[b_ix]) 93 | masks = F.pad(Variable(masks), pad_size, 'constant', 0).data.cpu() 94 | # [n, max_img_h, max_img_w] -> [max_num_gt_bboxes, max_img_h, max_img_w] 95 | if masks.shape[0] < max_num_gt_bboxes: 96 | pad_masks = masks.new(max_num_gt_bboxes - masks.shape[0], max_img_h, max_img_w).zero_() 97 | masks = torch.cat([masks, pad_masks], dim=0) 98 | padded_gt_masks.append(masks.numpy()) 99 | 100 | padded_images = torch.cat(padded_images, dim = 0) 101 | unpad_image_sizes = np.stack(unpad_image_sizes, axis = 0) 102 | stack_fn = lambda x : np.stack(x, axis=0) if x else np.array([]) 103 | padded_gt_bboxes = stack_fn(padded_gt_bboxes) 104 | padded_ig_bboxes = stack_fn(padded_ig_bboxes) 105 | padded_gt_keypoints = stack_fn(padded_gt_keypoints) 106 | padded_gt_masks = stack_fn(padded_gt_masks) 107 | 108 | #logger.debug('image.shape:{}'.format(padded_images.shape)) 109 | #logger.debug('gt_box.shape:{}'.format(padded_gt_bboxes.shape)) 110 | #logger.debug('image_info.shape:{}'.format(unpad_image_sizes.shape)) 111 | #logger.debug('gt_kpts.shape:{}'.format(padded_gt_keypoints.shape)) 112 | #logger.debug('gt_mask.shape:{}'.format(padded_gt_masks.shape)) 113 | return [padded_images, 114 | unpad_image_sizes, 115 | padded_gt_bboxes, 116 | padded_ig_bboxes, 117 | padded_gt_keypoints, 118 | padded_gt_masks, 119 | filenames] 120 | 121 | 122 | def validate(anno_file): 123 | from pycocotools.coco import COCO 124 | coco = COCO(anno_file) 125 | image_a = set() 126 | image_b = set() 127 | for anno in coco.anns.values(): 128 | image_a.add(anno['image_id']) 129 | if anno['num_keypoints'] > 0: 130 | image_b.add(anno['image_id']) 131 | print('total images of person :{}\n'.format(len(image_a))) 132 | print('images with annotated keypoints:{}\n'.format(len(image_b))) 133 | 134 | -------------------------------------------------------------------------------- /extensions/_focal_loss/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from ._ext import focal_loss 4 | import time 5 | import logging 6 | 7 | class SigmoidFocalLossFunction(Function): 8 | def __init__(self, gamma, alpha, num_classes): 9 | self.gamma = gamma 10 | self.alpha = alpha 11 | self.num_classes = num_classes 12 | 13 | self.weight_pos = None 14 | self.preds = None 15 | self.targets = None 16 | 17 | def forward(self, preds, targets, weight_pos): 18 | # preds shape: [Batch * h * w * num_anchors, num_classes] 19 | # targets shape: [Batch * h * w * num_anchors] 20 | preds_size = preds.size() 21 | targets_size = targets.size() 22 | 23 | assert(preds_size[0] == targets_size[0]) 24 | assert(preds_size[1] == self.num_classes) 25 | 26 | losses = preds.new(preds_size[0], preds_size[1]).zero_() 27 | weight_pos = float(weight_pos[0]) 28 | N = preds_size[0] * preds_size[1] 29 | 30 | assert(losses.is_contiguous()) 31 | assert(preds.is_contiguous()) 32 | assert(targets.is_contiguous()) 33 | 34 | assert(preds.is_cuda and targets.is_cuda) 35 | focal_loss.focal_loss_sigmoid_forward_cuda(N, 36 | preds, 37 | targets, 38 | weight_pos, 39 | self.gamma, 40 | self.alpha, 41 | self.num_classes, 42 | losses) 43 | self.preds = preds 44 | self.targets = targets 45 | self.weight_pos = weight_pos 46 | return torch.cuda.FloatTensor([losses.sum()]) 47 | 48 | def backward(self, grad_output): 49 | # grad_output: 1.0 / num_of_gpus 50 | preds_size = self.preds.size() 51 | grad_input = self.preds.new(preds_size[0], preds_size[1]).zero_() 52 | N = preds_size[0] * preds_size[1] 53 | 54 | assert(self.preds.is_contiguous()) 55 | assert(self.targets.is_contiguous()) 56 | assert(grad_input.is_contiguous()) 57 | 58 | assert(self.preds.is_cuda and self.targets.is_cuda and grad_input.is_cuda) 59 | focal_loss.focal_loss_sigmoid_backward_cuda(N, 60 | self.preds, 61 | self.targets, 62 | grad_input, 63 | self.weight_pos, 64 | self.gamma, 65 | self.alpha, 66 | self.num_classes) 67 | grad_input = grad_input * grad_output 68 | return grad_input, None, None 69 | 70 | class SoftmaxFocalLossFunction(Function): 71 | def __init__(self, gamma, alpha, num_classes): 72 | self.gamma = gamma 73 | self.alpha = alpha 74 | self.num_classes = num_classes 75 | 76 | self.weight_pos = None 77 | self.preds = None 78 | self.targets = None 79 | 80 | def forward(self, preds, targets, weight_pos): 81 | # preds shape: [Batch * h * w * num_anchors, num_classes] 82 | # targets shape: [Batch * h * w * num_anchors] 83 | preds_size = preds.size() 84 | targets_size = targets.size() 85 | 86 | assert(preds_size[0] == targets_size[0]) 87 | assert(preds_size[1] == self.num_classes) 88 | 89 | losses = preds.new(preds_size[0]).zero_() 90 | priors = preds.new(preds_size[0], preds_size[1]).zero_() 91 | 92 | weight_pos = float(weight_pos[0]) 93 | N = preds_size[0] * preds_size[1] 94 | 95 | 96 | assert(losses.is_contiguous()) 97 | assert(preds.is_contiguous()) 98 | assert(targets.is_contiguous()) 99 | assert(priors.is_contiguous()) 100 | 101 | assert(preds.is_cuda and targets.is_cuda) 102 | focal_loss.focal_loss_softmax_forward_cuda(N, 103 | preds, 104 | targets, 105 | weight_pos, 106 | self.gamma, 107 | self.alpha, 108 | self.num_classes, 109 | losses, 110 | priors) 111 | 112 | self.preds = preds 113 | self.targets = targets 114 | self.weight_pos = weight_pos 115 | self.priors = priors 116 | return torch.cuda.FloatTensor([losses.sum()]) 117 | 118 | def backward(self, grad_output): 119 | # grad_output: 1.0 / num_of_gpus 120 | preds_size = self.preds.size() 121 | grad_input = self.preds.new(preds_size[0], preds_size[1]).zero_() 122 | buff = self.preds.new(preds_size[0]).zero_() 123 | N = preds_size[0] * preds_size[1] 124 | 125 | assert(self.preds.is_contiguous()) 126 | assert(self.targets.is_contiguous()) 127 | assert(grad_input.is_contiguous()) 128 | assert(buff.is_contiguous()) 129 | 130 | assert(self.preds.is_cuda and self.targets.is_cuda and grad_input.is_cuda and buff.is_cuda) 131 | focal_loss.focal_loss_softmax_backward_cuda(N, 132 | self.preds, 133 | self.targets, 134 | grad_input, 135 | self.weight_pos, 136 | self.gamma, 137 | self.alpha, 138 | self.num_classes, 139 | self.priors, 140 | buff) 141 | grad_input = grad_input * grad_output 142 | return grad_input, None, None 143 | -------------------------------------------------------------------------------- /datasets/example_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | from torch.utils.data import DataLoader, Dataset 4 | import torchvision.transforms as transforms 5 | import numpy as np 6 | from io import StringIO 7 | from PIL import Image 8 | import pickle as pk 9 | import os 10 | import logging 11 | 12 | def pil_loader(img_str): 13 | #buff = StringIO.StringIO() 14 | buff = StringIO() 15 | buff.write(img_str) 16 | buff.seek(0) 17 | with Image.open(buff) as img: 18 | return img.convert('RGB') 19 | 20 | class ExampleDataset(Dataset): 21 | def __init__(self, root_dir, list_file, transform_fn, normalize_fn=None, memcached=False): 22 | #self.logger = logging.getLogger('global') 23 | self.root_dir = root_dir 24 | self.transform_fn = transform_fn 25 | self.normalize_fn = normalize_fn 26 | # self.memcached = memcached 27 | 28 | #self.logger.info("building dataset from %s" % list_file) 29 | save_name = 'meta_%s'%(list_file.split('.')[0].strip('/').replace('/', '_')) 30 | ## load annotations if exist 31 | if os.path.exists(save_name): 32 | with open(save_name, 'rb') as f: 33 | self.metas = pk.load(f) 34 | self.num = len(self.metas) 35 | # aspect ratio of images for sampler sort 36 | self.aspect_ratios = [float(m[1])/m[2] for m in self.metas] 37 | return 38 | ## otherwise parse annotations 39 | with open(list_file) as f: 40 | lines = f.readlines() 41 | self.metas = [] 42 | count = 0 43 | i = 0 44 | while i < len(lines): 45 | img_ig = [] 46 | img_gt = [] 47 | labels = [] 48 | img_name = lines[i + 1].rstrip() 49 | img_height = float(lines[i + 3]) 50 | img_width = float(lines[i + 4]) 51 | img_ig_size = int (lines[i + 6]) 52 | i += 7 53 | for j in range(img_ig_size): 54 | sp = lines[i + j].split() 55 | img_ig.append([float(sp[0]), float(sp[1]), float(sp[2]), float(sp[3])]) 56 | if len(img_ig) == 0: 57 | img_ig.append([0,0,0,0]) 58 | i += img_ig_size 59 | img_gt_size = int(lines[i]) 60 | i += 1 61 | for j in range(img_gt_size): 62 | sp = lines[i + j].split() 63 | img_gt.append([float(sp[1]),float(sp[2]),float(sp[3]),float(sp[4])]) 64 | labels.append(int(sp[0])) 65 | i += img_gt_size 66 | count += 1 67 | #if count % 100 == 0: 68 | # self.logger.info(count) 69 | self.metas.append([img_name, img_height, img_width, np.array(img_gt), np.array(labels), np.array(img_ig)]) 70 | with open(save_name, 'wb') as f: 71 | pk.dump(self.metas, f) 72 | #self.logger.info("read meta done") 73 | self.num = len(self.metas) 74 | # aspect ratio of images for sampler sort 75 | self.aspect_ratios = [float(m[1])/m[2] for m in self.metas] 76 | 77 | def __len__(self): 78 | return self.num 79 | 80 | def __getitem__(self, idx): 81 | filename = os.path.join(self.root_dir, self.metas[idx][0]) 82 | h, w, bbox, labels, ignores = self.metas[idx][1:] 83 | bbox = bbox.astype(np.float32) 84 | ignores = ignores.astype(np.float32) 85 | labels = labels.astype(np.float32) 86 | img = Image.open(filename) 87 | if img.mode == 'L': 88 | img = img.convert('RGB') 89 | assert(img.size[0]==w and img.size[1]==h) 90 | ## det transform 91 | img, bbox, resize_scale, ignores = self.transform_fn(img, bbox, ignores) 92 | new_w, new_h = img.size 93 | ## to tensor 94 | to_tensor = transforms.ToTensor() 95 | img = to_tensor(img) 96 | if self.normalize_fn != None: 97 | img = self.normalize_fn(img) 98 | bbox = np.hstack([bbox, labels[:, np.newaxis]]) 99 | return [img.unsqueeze(0), 100 | torch.Tensor([new_h, new_w, resize_scale]), 101 | torch.from_numpy(bbox), 102 | torch.from_numpy(ignores), 103 | filename] 104 | 105 | 106 | class ExampleTransform(object): 107 | def __init__(self, sizes, max_size, flip=False): 108 | if not isinstance(sizes, list): 109 | sizes = [sizes] 110 | self.scale_min = min(sizes) 111 | self.scale_max = max(sizes) 112 | self.max_size = max_size 113 | self.flip = flip 114 | 115 | def __call__(self, img, bbox, ignores): 116 | 117 | w, h = img.size 118 | short = min(w, h) 119 | large = max(w, h) 120 | 121 | size = np.random.randint(self.scale_min, self.scale_max + 1) 122 | scale = min(size / short, self.max_size / large) 123 | new_w, new_h = int(w * scale), int(h * scale) 124 | 125 | new_img = img.resize((new_w, new_h)) 126 | 127 | new_bbox = np.array(bbox) 128 | new_bbox[:, 0] = np.floor(new_bbox[:, 0] * scale) 129 | new_bbox[:, 1] = np.floor(new_bbox[:, 1] * scale) 130 | new_bbox[:, 2] = np.ceil(new_bbox[:, 2] * scale) 131 | new_bbox[:, 3] = np.ceil(new_bbox[:, 3] * scale) 132 | new_ignores = np.array(ignores) 133 | if new_ignores.shape[0] > 0: 134 | new_ignores[:, 0] = np.floor(new_ignores[:, 0] * scale) 135 | new_ignores[:, 1] = np.floor(new_ignores[:, 1] * scale) 136 | new_ignores[:, 2] = np.ceil(new_ignores[:, 2] * scale) 137 | new_ignores[:, 3] = np.ceil(new_ignores[:, 3] * scale) 138 | 139 | if self.flip: 140 | if np.random.random() < 0.5: 141 | new_img = new_img.transpose(Image.FLIP_LEFT_RIGHT) 142 | new_bbox[:, 0], new_bbox[:, 2] = new_w - new_bbox[:, 2], new_w - new_bbox[:, 0] 143 | if new_ignores.shape[0] > 0: 144 | new_ignores[:, 0], new_ignores[:,2] = new_w - new_ignores[:, 2], new_w - new_ignores[:, 0] 145 | return new_img, new_bbox, scale, new_ignores 146 | -------------------------------------------------------------------------------- /utils/cal_mAP.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | #------------------------------------------- 3 | # cal mAP | base on pytorch example dataset 4 | # for cityscapes specifically 5 | # pang jiangmiao | 2018.04.15 6 | #------------------------------------------- 7 | # import sys 8 | import numpy as np 9 | from collections import defaultdict 10 | import subprocess 11 | 12 | # import pprint 13 | # import pdb 14 | import logging 15 | logger = logging.getLogger('global') 16 | def parse_gts(gts_list, num_classes): 17 | '''parse detection ground truths list 18 | dict[img_name] = {height:, width:, bbox_num:, bbox:{cls:[[x1,y1,x2,y2],...], ...} } 19 | ''' 20 | logger.info('Start parsing gts list......') 21 | index_info = [temp for temp in enumerate(gts_list) if temp[1].startswith('#')] 22 | gts = defaultdict(list) 23 | gts['num'] = np.zeros(num_classes) 24 | for i in range(len(index_info)): 25 | index = index_info[i][0] 26 | img_name = gts_list[index + 1].strip() # val/folder/img_name.png 27 | pure_name = img_name.split('/')[-1][0:-4] # img_name 28 | gts[pure_name] = defaultdict(list) 29 | gts[pure_name]['height'] = gts_list[index+3].strip() 30 | gts[pure_name]['width'] = gts_list[index+4].strip() 31 | gts[pure_name]['bbox_num'] = int(gts_list[index+7]) 32 | gts[pure_name]['bbox'] = defaultdict(list) 33 | for b in gts_list[index+8:index+8+int(gts_list[index+7])]: 34 | b = b.split() 35 | label = int(b[0]) 36 | x1 = int(b[1]) 37 | y1 = int(b[2]) 38 | x2 = int(b[3]) 39 | y2 = int(b[4]) 40 | gts[pure_name]['bbox'][label].append([x1, y1, x2, y2]) 41 | gts['num'][label] += 1 42 | gts[pure_name]['is_det'] = defaultdict(list) 43 | for l in range(1, num_classes): 44 | gts[pure_name]['is_det'][l] = np.zeros(len(gts[pure_name]['bbox'][l])) 45 | logger.info('Done!') 46 | return gts 47 | 48 | def parse_res(res_list): 49 | '''parse results list 50 | dict[cls] = [[x1, y1, x2, y2, score, img_name], ...] 51 | ''' 52 | logger.info('Start parsing results list......') 53 | results = defaultdict(list) 54 | for r in res_list: 55 | r = r.split() 56 | img_name = r[0] # img_name no extension 57 | label = int(r[6]) 58 | score = float(r[5]) 59 | x1 = int(float(r[1])) 60 | y1 = int(float(r[2])) 61 | x2 = int(float(r[3])) 62 | y2 = int(float(r[4])) 63 | results[label].append([x1, y1, x2, y2, score, img_name]) 64 | logger.info('Done!') 65 | return results 66 | 67 | def calIoU(result, gt_i): 68 | # result: [x1, y1, x2, y2, score, img_name] 69 | # gts: [[x1, x2, y1, y2], []...] 70 | x1 = result[0] 71 | y1 = result[1] 72 | x2 = result[2] 73 | y2 = result[3] 74 | overmax = -1 75 | is_which = -1 76 | for k, gt in enumerate(gt_i): 77 | gt_x1 = gt[0] 78 | gt_y1 = gt[1] 79 | gt_x2 = gt[2] 80 | gt_y2 = gt[3] 81 | inter_x1 = max(x1, gt_x1) 82 | inter_y1 = max(y1, gt_y1) 83 | inter_x2 = min(x2, gt_x2) 84 | inter_y2 = min(y2, gt_y2) 85 | if inter_x1 < inter_x2 and inter_y1 < inter_y2: 86 | area_inter = (inter_x2 - inter_x1 + 1) * (inter_y2 - inter_y1 + 1) 87 | area_sum1 = (x2 - x1 + 1) * (y2 - y1 + 1) 88 | area_sum2 = (gt_x2 - gt_x1 + 1) * (gt_y2 - gt_y1 + 1) 89 | IoU = area_inter/(area_sum1 + area_sum2 - area_inter) 90 | if IoU > overmax: 91 | overmax = IoU 92 | is_which = k 93 | return overmax, is_which 94 | 95 | def cal_mAP(gts, results, num_classes, overlap_thre): 96 | ap = np.zeros(num_classes) 97 | max_recall = np.zeros(num_classes) 98 | for class_i in range(1, num_classes): 99 | results_i = results[class_i] 100 | res_num = len(results_i) 101 | tp = np.zeros(res_num) 102 | fp = np.zeros(res_num) 103 | sum_gt = gts['num'][class_i] 104 | logger.info('sum_gt: {}'.format(sum_gt)) 105 | results_i = sorted(results_i, key = lambda xx : xx[4], reverse=True) 106 | for k, res in enumerate(results_i): 107 | img_name = res[-1] 108 | gts_i = gts[img_name]['bbox'][int(class_i)] 109 | overmax, is_which = calIoU(res, gts_i) 110 | if overmax >= overlap_thre and gts[img_name]['is_det'][class_i][is_which] == 0: 111 | tp[k] = 1 112 | gts[img_name]['is_det'][class_i][is_which] = 1 113 | else: 114 | fp[k] = 1 115 | rec = np.zeros(res_num) 116 | prec = np.zeros(res_num) 117 | for v in range(res_num): 118 | if v > 0: 119 | tp[v] = tp[v] + tp[v-1] 120 | fp[v] = fp[v] + fp[v-1] 121 | rec[v] = tp[v] / sum_gt 122 | prec[v] = tp[v] / (tp[v] + fp[v]) 123 | for v in range(res_num-2, -1, -1): 124 | prec[v] = max(prec[v], prec[v+1]) 125 | for v in range(res_num): 126 | if v == 0: 127 | ap[class_i] += rec[v] * prec[v] 128 | else: 129 | ap[class_i] += (rec[v] - rec[v-1]) * prec[v] 130 | max_recall[class_i] = np.max(rec) 131 | logger.info('class {} --- ap: {} max recall: {}'.format(class_i, ap[class_i], max_recall[class_i])) 132 | return ap, max_recall 133 | 134 | 135 | def Cal_MAP1(res_list, gts_list, num_classes): 136 | # with open(res_list, 'r') as f_res: 137 | # res_list = f_res.readlines() 138 | # with open(gts_list, 'r') as f_gts: 139 | # gts_list = f_gts.readlines() 140 | overlap_thre = 0.5 141 | num_classes = int(num_classes) 142 | gts = parse_gts(gts_list, num_classes) 143 | results = parse_res(res_list) 144 | 145 | ap, max_recall = cal_mAP(gts, results, num_classes, overlap_thre) 146 | mAP = np.mean(ap[1:]) 147 | m_rec = np.mean(max_recall[1:]) 148 | # print('--------------------') 149 | logger.info('mAP: {} max recall: {}'.format(mAP, m_rec)) 150 | # print('--------------------') 151 | return mAP 152 | 153 | def Cal_MAP(res_dir, gts_list, num_classes): 154 | overlap_thre = 0.5 155 | res_list = 'results.txt' 156 | subprocess.call("cat {}/results.txt.rank* > {}/{}".format(res_dir,res_dir, res_list), shell=True) 157 | 158 | with open("{}/{}".format(res_dir, res_list), 'r', encoding='utf-8') as f_res: 159 | res_list = f_res.readlines() 160 | with open(gts_list, 'r', encoding='utf-8') as f_gts: 161 | gts_list = f_gts.readlines() 162 | 163 | gts = parse_gts(gts_list, num_classes) 164 | results = parse_res(res_list) 165 | 166 | ap, max_recall = cal_mAP(gts, results, num_classes, overlap_thre) 167 | mAP = np.mean(ap[1:]) 168 | m_rec = np.mean(max_recall[1:]) 169 | print('--------------------') 170 | print('mAP: {} max recall: {}'.format(mAP, m_rec)) 171 | print('--------------------') 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /extensions/_cython_bbox/cython_nms.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017-present, Facebook, Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | ############################################################################## 15 | # 16 | # Based on: 17 | # -------------------------------------------------------- 18 | # Fast R-CNN 19 | # Copyright (c) 2015 Microsoft 20 | # Licensed under The MIT License [see LICENSE for details] 21 | # Written by Ross Girshick 22 | # -------------------------------------------------------- 23 | 24 | cimport cython 25 | import numpy as np 26 | cimport numpy as np 27 | 28 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil: 29 | return a if a >= b else b 30 | 31 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil: 32 | return a if a <= b else b 33 | 34 | @cython.boundscheck(False) 35 | @cython.cdivision(True) 36 | @cython.wraparound(False) 37 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh): 38 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 39 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 40 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 41 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 42 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 43 | 44 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 45 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 46 | 47 | cdef int ndets = dets.shape[0] 48 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 49 | np.zeros((ndets), dtype=np.int) 50 | 51 | # nominal indices 52 | cdef int _i, _j 53 | # sorted indices 54 | cdef int i, j 55 | # temp variables for box i's (the box currently under consideration) 56 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 57 | # variables for computing overlap with box j (lower scoring box) 58 | cdef np.float32_t xx1, yy1, xx2, yy2 59 | cdef np.float32_t w, h 60 | cdef np.float32_t inter, ovr 61 | 62 | with nogil: 63 | for _i in range(ndets): 64 | i = order[_i] 65 | if suppressed[i] == 1: 66 | continue 67 | ix1 = x1[i] 68 | iy1 = y1[i] 69 | ix2 = x2[i] 70 | iy2 = y2[i] 71 | iarea = areas[i] 72 | for _j in range(_i + 1, ndets): 73 | j = order[_j] 74 | if suppressed[j] == 1: 75 | continue 76 | xx1 = max(ix1, x1[j]) 77 | yy1 = max(iy1, y1[j]) 78 | xx2 = min(ix2, x2[j]) 79 | yy2 = min(iy2, y2[j]) 80 | w = max(0.0, xx2 - xx1 + 1) 81 | h = max(0.0, yy2 - yy1 + 1) 82 | inter = w * h 83 | ovr = inter / (iarea + areas[j] - inter) 84 | if ovr >= thresh: 85 | suppressed[j] = 1 86 | 87 | return np.where(suppressed == 0)[0] 88 | 89 | # ---------------------------------------------------------- 90 | # Soft-NMS: Improving Object Detection With One Line of Code 91 | # Copyright (c) University of Maryland, College Park 92 | # Licensed under The MIT License [see LICENSE for details] 93 | # Written by Navaneeth Bodla and Bharat Singh 94 | # ---------------------------------------------------------- 95 | @cython.boundscheck(False) 96 | @cython.cdivision(True) 97 | @cython.wraparound(False) 98 | def soft_nms( 99 | np.ndarray[float, ndim=2] boxes_in, 100 | float sigma=0.5, 101 | float Nt=0.3, 102 | float threshold=0.001, 103 | unsigned int method=0 104 | ): 105 | boxes = boxes_in.copy() 106 | cdef unsigned int N = boxes.shape[0] 107 | cdef float iw, ih, box_area 108 | cdef float ua 109 | cdef int pos = 0 110 | cdef float maxscore = 0 111 | cdef int maxpos = 0 112 | cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov 113 | inds = np.arange(N) 114 | 115 | for i in range(N): 116 | maxscore = boxes[i, 4] 117 | maxpos = i 118 | 119 | tx1 = boxes[i,0] 120 | ty1 = boxes[i,1] 121 | tx2 = boxes[i,2] 122 | ty2 = boxes[i,3] 123 | ts = boxes[i,4] 124 | ti = inds[i] 125 | 126 | pos = i + 1 127 | # get max box 128 | while pos < N: 129 | if maxscore < boxes[pos, 4]: 130 | maxscore = boxes[pos, 4] 131 | maxpos = pos 132 | pos = pos + 1 133 | 134 | # add max box as a detection 135 | boxes[i,0] = boxes[maxpos,0] 136 | boxes[i,1] = boxes[maxpos,1] 137 | boxes[i,2] = boxes[maxpos,2] 138 | boxes[i,3] = boxes[maxpos,3] 139 | boxes[i,4] = boxes[maxpos,4] 140 | inds[i] = inds[maxpos] 141 | 142 | # swap ith box with position of max box 143 | boxes[maxpos,0] = tx1 144 | boxes[maxpos,1] = ty1 145 | boxes[maxpos,2] = tx2 146 | boxes[maxpos,3] = ty2 147 | boxes[maxpos,4] = ts 148 | inds[maxpos] = ti 149 | 150 | tx1 = boxes[i,0] 151 | ty1 = boxes[i,1] 152 | tx2 = boxes[i,2] 153 | ty2 = boxes[i,3] 154 | ts = boxes[i,4] 155 | 156 | pos = i + 1 157 | # NMS iterations, note that N changes if detection boxes fall below 158 | # threshold 159 | while pos < N: 160 | x1 = boxes[pos, 0] 161 | y1 = boxes[pos, 1] 162 | x2 = boxes[pos, 2] 163 | y2 = boxes[pos, 3] 164 | s = boxes[pos, 4] 165 | 166 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 167 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 168 | if iw > 0: 169 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 170 | if ih > 0: 171 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 172 | ov = iw * ih / ua #iou between max box and detection box 173 | 174 | if method == 1: # linear 175 | if ov > Nt: 176 | weight = 1 - ov 177 | else: 178 | weight = 1 179 | elif method == 2: # gaussian 180 | weight = np.exp(-(ov * ov)/sigma) 181 | else: # original NMS 182 | if ov > Nt: 183 | weight = 0 184 | else: 185 | weight = 1 186 | 187 | boxes[pos, 4] = weight*boxes[pos, 4] 188 | 189 | # if box score falls below threshold, discard the box by 190 | # swapping with last box update N 191 | if boxes[pos, 4] < threshold: 192 | boxes[pos,0] = boxes[N-1, 0] 193 | boxes[pos,1] = boxes[N-1, 1] 194 | boxes[pos,2] = boxes[N-1, 2] 195 | boxes[pos,3] = boxes[N-1, 3] 196 | boxes[pos,4] = boxes[N-1, 4] 197 | inds[pos] = inds[N-1] 198 | N = N - 1 199 | pos = pos - 1 200 | 201 | pos = pos + 1 202 | 203 | return boxes[:N], inds[:N] 204 | -------------------------------------------------------------------------------- /models/losses.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 18-4-19 3 | # @Author : Xinge 4 | # import torch 5 | import torch.nn as nn 6 | # import numpy as np 7 | import torch 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable, Function 10 | # import numpy as np 11 | from math import exp 12 | 13 | 14 | class Losses(nn.Module): 15 | def __init__(self): 16 | super(Losses, self).__init__() 17 | # self.loss = nn.functional.kl_div 18 | 19 | 20 | def forward(self, input1, input2): 21 | """ 22 | KL divergence loss 23 | :param input1: 24 | :param input2: 25 | :return: 26 | """ 27 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1)) 28 | # assert input1.size() == 2, "more than two dimensions" 29 | input1 = nn.functional.log_softmax(input1, dim = 1) 30 | input2 = nn.functional.softmax(input2, dim = 1) 31 | # loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0) 32 | final_loss = (input2 * (input2.log() - input1.log())).mean() 33 | return final_loss * input1.size(0) 34 | 35 | class Losses_triplet(nn.Module): 36 | def __init__(self): 37 | super(Losses_triplet, self).__init__() 38 | self.loss = nn.functional.kl_div 39 | 40 | 41 | def forward(self, real_img, input1, input2): 42 | """ 43 | KL divergence loss 44 | :param input1: fake source 45 | :param input2: fake target 46 | :param real_img: real source 47 | :return: 48 | """ 49 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1)) 50 | # assert input1.size() == 2, "more than two dimensions" 51 | input1_log = nn.functional.log_softmax(input1, dim = 1) 52 | input2_log = nn.functional.log_softmax(input2, dim = 1) 53 | # input1 = nn.functional.softmax(input1, dim = 1) 54 | # input2 = nn.functional.softmax(input2, dim = 1) 55 | real_img = nn.functional.softmax(real_img, dim = 1) 56 | positive_loss = self.loss(input2_log, real_img, size_average=True) * 1000.0 57 | # negative_loss = torch.max(0, 1.0 - self.loss(input1_log, real_img, size_average=True)) 58 | negative_loss = 1.0 - self.loss(input1_log, real_img, size_average=True) * 1000.0 59 | if (negative_loss.data < 0.0).all(): 60 | negative_loss.data = torch.cuda.FloatTensor([0.0]) 61 | # print("posi: ", positive_loss) 62 | # print("nega: ", negative_loss) 63 | # loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0) 64 | return positive_loss + negative_loss 65 | 66 | class Losses_triplet_nll(nn.Module): 67 | def __init__(self): 68 | super(Losses_triplet_nll, self).__init__() 69 | self.loss = nn.functional.mse_loss 70 | 71 | 72 | def forward(self, real_img, input1, input2): 73 | """ 74 | KL divergence loss 75 | :param input1: fake source 76 | :param input2: fake target 77 | :param real_img: real source 78 | :return: 79 | """ 80 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1)) 81 | # assert input1.size() == 2, "more than two dimensions" 82 | posi_dist = self.loss(input2, real_img) 83 | nega_dist = self.loss(input1, real_img) 84 | 85 | Pt = torch.exp(nega_dist) / (torch.exp(nega_dist) + torch.exp(posi_dist)) 86 | 87 | loss_pt = -1.0 * torch.log(Pt) 88 | 89 | return loss_pt 90 | 91 | 92 | class GradReverse(Function): 93 | 94 | def __init__(self, lambd): 95 | self.lambd = lambd 96 | 97 | def forward(self, x): 98 | return x.view_as(x) 99 | 100 | def backward(self, grad_output): 101 | return (grad_output * -self.lambd) 102 | 103 | 104 | def grad_reverse(x, lambd): 105 | return GradReverse(lambd)(x) 106 | 107 | 108 | class Losses3(nn.Module): 109 | def __init__(self): 110 | super(Losses3, self).__init__() 111 | # self.loss = nn.functional.kl_div 112 | 113 | 114 | def forward(self, input1, input2): 115 | """ 116 | KL divergence loss 117 | :param input1: 118 | :param input2: 119 | :return: 120 | """ 121 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1)) 122 | # assert input1.size() == 2, "more than two dimensions" 123 | input1 = nn.functional.log_softmax(input1, dim = 1) 124 | input2 = nn.functional.softmax(input2, dim = 1) 125 | loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0) 126 | return loss_output 127 | 128 | class Losses2(nn.Module): 129 | def __init__(self, in1_size, in2_size, out_size): 130 | super(Losses2, self).__init__() 131 | self.loss = nn.Bilinear(in1_size, in2_size, out_size, False) 132 | 133 | def forward(self, input1, input2): 134 | """ 135 | Bilinear Transform Loss 136 | :param input1: (N, in1_size) 137 | :param input2: (N, in2_size) 138 | :return: (N, out_size) 139 | """ 140 | return self.loss(input1, input2) 141 | 142 | 143 | 144 | 145 | def gaussian(window_size, sigma): 146 | gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)]) 147 | return gauss / gauss.sum() 148 | 149 | 150 | def create_window(window_size, channel): 151 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1) 152 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0) 153 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous()) 154 | return window 155 | 156 | 157 | def _ssim(img1, img2, window, window_size, channel, size_average=True): 158 | mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel) 159 | mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel) 160 | 161 | mu1_sq = mu1.pow(2) 162 | mu2_sq = mu2.pow(2) 163 | mu1_mu2 = mu1 * mu2 164 | 165 | sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq 166 | sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq 167 | sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2 168 | 169 | C1 = 0.01 ** 2 170 | C2 = 0.03 ** 2 171 | 172 | ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) 173 | 174 | if size_average: 175 | return ssim_map.mean() 176 | else: 177 | return ssim_map.mean(1).mean(1).mean(1) 178 | 179 | 180 | class SSIM(torch.nn.Module): 181 | def __init__(self, window_size=110, size_average=True): 182 | super(SSIM, self).__init__() 183 | self.window_size = window_size 184 | self.size_average = size_average 185 | self.channel = 1 186 | self.window = create_window(window_size, self.channel) 187 | 188 | def forward(self, img1, img2): 189 | (_, channel, _, _) = img1.size() 190 | 191 | if channel == self.channel and self.window.data.type() == img1.data.type(): 192 | window = self.window 193 | else: 194 | window = create_window(self.window_size, channel) 195 | 196 | if img1.is_cuda: 197 | window = window.cuda(img1.get_device()) 198 | window = window.type_as(img1) 199 | 200 | self.window = window 201 | self.channel = channel 202 | 203 | return _ssim(img1, img2, window, self.window_size, channel, self.size_average) 204 | 205 | 206 | def ssim(img1, img2, window_size=110, size_average=True): 207 | (_, channel, _, _) = img1.size() 208 | window = create_window(window_size, channel) 209 | 210 | if img1.is_cuda: 211 | window = window.cuda(img1.get_device()) 212 | window = window.type_as(img1) 213 | 214 | return _ssim(img1, img2, window, window_size, channel, size_average) 215 | 216 | -------------------------------------------------------------------------------- /models/faster_rcnn/vgg_adver_expansion_cluster.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 18-6-22 9:30 3 | # @Author : Xinge 4 | 5 | from extensions import RoIPool 6 | from .faster_rcnn_adver_expansion_reweight_cluster import FasterRCNN_AdEx 7 | from models.head import NaiveRpnHead 8 | import torch.nn as nn 9 | import torch.utils.model_zoo as model_zoo 10 | import math 11 | # from .common_net import LayerNorm 12 | 13 | __all__ = [ 14 | 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 15 | 'vgg19_bn', 'vgg19', 16 | ] 17 | 18 | 19 | model_urls = { 20 | 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth', 21 | 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth', 22 | 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth', 23 | 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth', 24 | 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth', 25 | 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth', 26 | 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth', 27 | 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth', 28 | } 29 | 30 | class VGG(FasterRCNN_AdEx): 31 | 32 | def __init__(self, features, cfg): 33 | super(VGG, self).__init__(cfg['gan_model_flag']) 34 | 35 | self.features = features 36 | # self.features2 = features 37 | #drop out last pooling layer so that feature stride is 2^4 38 | last_pooling = self.features._modules.popitem(last = True) 39 | # last_pooling2 = self.features2._modules.popitem(last = True) 40 | # rpn head 41 | num_anchors = len(cfg['anchor_scales']) * len(cfg['anchor_ratios']) 42 | self.rpn_head = NaiveRpnHead(512, num_classes=2, num_anchors=num_anchors) 43 | 44 | # rcnn head 45 | self.roipooling = RoIPool(7, 7, 1.0 / cfg['anchor_stride']) 46 | self.classifier = nn.Sequential( 47 | nn.Linear(512 * 7 * 7, 4096), 48 | # nn.BatchNorm1d(num_features=4096), 49 | # LayerNorm(4096), 50 | nn.ReLU(True), 51 | nn.Dropout(), 52 | nn.Linear(4096, 4096), 53 | # LayerNorm(4096), 54 | # nn.BatchNorm1d(num_features=4096), 55 | nn.ReLU(True), 56 | nn.Dropout(), 57 | # nn.Linear(4096, num_classes), 58 | ) 59 | self.fc_rcnn_cls = nn.Linear(4096, cfg['num_classes']) 60 | self.fc_rcnn_loc = nn.Linear(4096, cfg['num_classes'] * 4) 61 | 62 | self._initialize_weights() 63 | 64 | def feature_extractor(self, x): 65 | return self.features(x) 66 | 67 | # def feature_extractor2(self, x): 68 | # return self.features2(x) 69 | 70 | def rpn(self, x): 71 | return self.rpn_head(x) 72 | 73 | def rcnn(self, x, rois): 74 | assert(rois.shape[1] == 5) 75 | x = self.roipooling(x, rois) # x.size(): [512, 512, 7, 7] 76 | x = x.view(x.size(0), -1) 77 | x_fea = self.classifier(x) # torch.Size([512, 4096]) 78 | rcnn_pred_cls = self.fc_rcnn_cls(x_fea) 79 | rcnn_pred_loc = self.fc_rcnn_loc(x_fea) 80 | return x_fea, rcnn_pred_cls, rcnn_pred_loc 81 | 82 | def _initialize_weights(self): 83 | # count = 1 84 | for m in self.modules(): 85 | if isinstance(m, nn.Conv2d): 86 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 87 | m.weight.data.normal_(0, math.sqrt(2. / n)) 88 | if m.bias is not None: 89 | m.bias.data.zero_() 90 | # if count <= 2: 91 | # m.eval() 92 | # count += 1 93 | elif isinstance(m, nn.BatchNorm2d): 94 | m.weight.data.fill_(1) 95 | m.bias.data.zero_() 96 | elif isinstance(m, nn.Linear): 97 | m.weight.data.normal_(0, 0.01) 98 | m.bias.data.zero_() 99 | 100 | 101 | def make_layers(cfg, batch_norm=False): 102 | layers = [] 103 | in_channels = 3 104 | for v in cfg: 105 | if v == 'M': 106 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 107 | else: 108 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 109 | if batch_norm: 110 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 111 | else: 112 | layers += [conv2d, nn.ReLU(inplace=True)] 113 | in_channels = v 114 | return nn.Sequential(*layers) 115 | 116 | 117 | cfg = { 118 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 119 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 120 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 121 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 122 | } 123 | 124 | 125 | def vgg11(pretrained=False, **kwargs): 126 | """VGG 11-layer model (configuration "A") 127 | 128 | Args: 129 | pretrained (bool): If True, returns a model pre-trained on ImageNet 130 | """ 131 | model = VGG(make_layers(cfg['A']), **kwargs) 132 | if pretrained: 133 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11'])) 134 | return model 135 | 136 | 137 | def vgg11_bn(pretrained=False, **kwargs): 138 | """VGG 11-layer model (configuration "A") with batch normalization 139 | 140 | Args: 141 | pretrained (bool): If True, returns a model pre-trained on ImageNet 142 | """ 143 | model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs) 144 | if pretrained: 145 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn'])) 146 | return model 147 | 148 | 149 | def vgg13(pretrained=False, **kwargs): 150 | """VGG 13-layer model (configuration "B") 151 | 152 | Args: 153 | pretrained (bool): If True, returns a model pre-trained on ImageNet 154 | """ 155 | model = VGG(make_layers(cfg['B']), **kwargs) 156 | if pretrained: 157 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13'])) 158 | return model 159 | 160 | 161 | def vgg13_bn(pretrained=False, **kwargs): 162 | """VGG 13-layer model (configuration "B") with batch normalization 163 | 164 | Args: 165 | pretrained (bool): If True, returns a model pre-trained on ImageNet 166 | """ 167 | model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs) 168 | if pretrained: 169 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn'])) 170 | return model 171 | 172 | 173 | def vgg16(pretrained=False, **kwargs): 174 | """VGG 16-layer model (configuration "D") 175 | 176 | Args: 177 | pretrained (bool): If True, returns a model pre-trained on ImageNet 178 | """ 179 | model = VGG(make_layers(cfg['D']), **kwargs) 180 | if pretrained: 181 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16'])) 182 | return model 183 | 184 | 185 | def vgg16_bn(pretrained=False, **kwargs): 186 | """VGG 16-layer model (configuration "D") with batch normalization 187 | 188 | Args: 189 | pretrained (bool): If True, returns a model pre-trained on ImageNet 190 | """ 191 | model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs) 192 | if pretrained: 193 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn'])) 194 | return model 195 | 196 | 197 | def vgg19(pretrained=False, **kwargs): 198 | """VGG 19-layer model (configuration "E") 199 | 200 | Args: 201 | pretrained (bool): If True, returns a model pre-trained on ImageNet 202 | """ 203 | model = VGG(make_layers(cfg['E']), **kwargs) 204 | if pretrained: 205 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19'])) 206 | return model 207 | 208 | 209 | def vgg19_bn(pretrained=False, **kwargs): 210 | """VGG 19-layer model (configuration 'E') with batch normalization 211 | 212 | Args: 213 | pretrained (bool): If True, returns a model pre-trained on ImageNet 214 | """ 215 | model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs) 216 | if pretrained: 217 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn'])) 218 | return model 219 | -------------------------------------------------------------------------------- /extensions/_roi_align/src/roi_align_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "roi_align_kernel.h" 9 | 10 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 12 | i += blockDim.x * gridDim.x) 13 | 14 | 15 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, 16 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) { 17 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 18 | // (n, c, ph, pw) is an element in the aligned output 19 | // int n = index; 20 | // int pw = n % aligned_width; 21 | // n /= aligned_width; 22 | // int ph = n % aligned_height; 23 | // n /= aligned_height; 24 | // int c = n % channels; 25 | // n /= channels; 26 | 27 | int pw = index % aligned_width; 28 | int ph = (index / aligned_width) % aligned_height; 29 | int c = (index / aligned_width / aligned_height) % channels; 30 | int n = index / aligned_width / aligned_height / channels; 31 | 32 | // bottom_rois += n * 5; 33 | float roi_batch_ind = bottom_rois[n * 5 + 0]; 34 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale; 35 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale; 36 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale; 37 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale; 38 | 39 | // Force malformed ROIs to be 1x1 40 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 41 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 42 | float bin_size_h = roi_height / (aligned_height - 1.); 43 | float bin_size_w = roi_width / (aligned_width - 1.); 44 | 45 | float h = (float)(ph) * bin_size_h + roi_start_h; 46 | float w = (float)(pw) * bin_size_w + roi_start_w; 47 | 48 | int hstart = fminf(floor(h), height - 2); 49 | int wstart = fminf(floor(w), width - 2); 50 | 51 | int img_start = roi_batch_ind * channels * height * width; 52 | 53 | // bilinear interpolation 54 | if (h < 0 || h >= height || w < 0 || w >= width) { 55 | top_data[index] = 0.; 56 | } else { 57 | float h_ratio = h - (float)(hstart); 58 | float w_ratio = w - (float)(wstart); 59 | int upleft = img_start + (c * height + hstart) * width + wstart; 60 | int upright = upleft + 1; 61 | int downleft = upleft + width; 62 | int downright = downleft + 1; 63 | 64 | top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio) 65 | + bottom_data[upright] * (1. - h_ratio) * w_ratio 66 | + bottom_data[downleft] * h_ratio * (1. - w_ratio) 67 | + bottom_data[downright] * h_ratio * w_ratio; 68 | } 69 | } 70 | } 71 | 72 | 73 | int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width, 74 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) { 75 | const int kThreadsPerBlock = 1024; 76 | const int output_size = num_rois * aligned_height * aligned_width * channels; 77 | cudaError_t err; 78 | 79 | 80 | ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 81 | output_size, bottom_data, spatial_scale, height, width, channels, 82 | aligned_height, aligned_width, bottom_rois, top_data); 83 | 84 | err = cudaGetLastError(); 85 | if(cudaSuccess != err) { 86 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 87 | exit( -1 ); 88 | } 89 | 90 | return 1; 91 | } 92 | 93 | 94 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, 95 | const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) { 96 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 97 | 98 | // (n, c, ph, pw) is an element in the aligned output 99 | int pw = index % aligned_width; 100 | int ph = (index / aligned_width) % aligned_height; 101 | int c = (index / aligned_width / aligned_height) % channels; 102 | int n = index / aligned_width / aligned_height / channels; 103 | 104 | float roi_batch_ind = bottom_rois[n * 5 + 0]; 105 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale; 106 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale; 107 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale; 108 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale; 109 | /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */ 110 | /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */ 111 | /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */ 112 | /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */ 113 | 114 | // Force malformed ROIs to be 1x1 115 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 116 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 117 | float bin_size_h = roi_height / (aligned_height - 1.); 118 | float bin_size_w = roi_width / (aligned_width - 1.); 119 | 120 | float h = (float)(ph) * bin_size_h + roi_start_h; 121 | float w = (float)(pw) * bin_size_w + roi_start_w; 122 | 123 | int hstart = fminf(floor(h), height - 2); 124 | int wstart = fminf(floor(w), width - 2); 125 | 126 | int img_start = roi_batch_ind * channels * height * width; 127 | 128 | // bilinear interpolation 129 | if (!(h < 0 || h >= height || w < 0 || w >= width)) { 130 | float h_ratio = h - (float)(hstart); 131 | float w_ratio = w - (float)(wstart); 132 | int upleft = img_start + (c * height + hstart) * width + wstart; 133 | int upright = upleft + 1; 134 | int downleft = upleft + width; 135 | int downright = downleft + 1; 136 | 137 | atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio)); 138 | atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio); 139 | atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio)); 140 | atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio); 141 | } 142 | } 143 | } 144 | 145 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width, 146 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) { 147 | const int kThreadsPerBlock = 1024; 148 | const int output_size = num_rois * aligned_height * aligned_width * channels; 149 | cudaError_t err; 150 | 151 | ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 152 | output_size, top_diff, spatial_scale, height, width, channels, 153 | aligned_height, aligned_width, bottom_diff, bottom_rois); 154 | 155 | err = cudaGetLastError(); 156 | if(cudaSuccess != err) { 157 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 158 | exit( -1 ); 159 | } 160 | 161 | return 1; 162 | } 163 | 164 | 165 | #ifdef __cplusplus 166 | } 167 | #endif 168 | -------------------------------------------------------------------------------- /models/faster_rcnn/test_module.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Time : 18-6-23 1:48 3 | # @Author : Xinge 4 | 5 | 6 | import torch.nn as nn 7 | import torch 8 | # from torch.autograd import Variable 9 | from common_net import * 10 | import torch.nn.functional as F 11 | 12 | class GAN_dis_AE(nn.Module): 13 | def __init__(self, params): 14 | super(GAN_dis_AE, self).__init__() 15 | ch = params['ch'] # 32 16 | input_dim_a = params['input_dim_a'] # 3 17 | 18 | n_layer = params['n_layer'] # 5 19 | self.model_A = self._make_net(ch, input_dim_a, n_layer - 1) # for the first stage 20 | self.model_A.apply(gaussian_weights_init) 21 | self.model_B = self._make_net(ch, input_dim_a, n_layer - 1) # for the first stage 22 | self.model_B.apply(gaussian_weights_init) 23 | 24 | 25 | 26 | def _make_net(self, ch, input_dim, n_layer): 27 | model = [] 28 | model += [LeakyReLUConv2d(input_dim, ch, kernel_size=3, stride=2, padding=1)] # 16 29 | tch = ch 30 | for i in range(0, n_layer): 31 | model += [LeakyReLUConv2d(tch, tch * 2, kernel_size=3, stride=2, padding=1)] # 8 32 | tch *= 2 33 | model += [nn.Conv2d(tch, 1, kernel_size=1, stride=1, padding=0)] # 1 34 | return nn.Sequential(*model) 35 | 36 | def forward(self, x_aa, x_bb): 37 | """ 38 | :param x_bA: the concatenation of 39 | :param x_aB: 40 | :param rois_feature: (512 x 4096) 41 | :return: 42 | """ 43 | # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0) 44 | out_A = self.model_A(x_aa) 45 | out_A = out_A.view(out_A.size(0), -1) 46 | out_B = self.model_B(x_bb) 47 | out_B = out_B.view(out_B.size(0), -1) 48 | 49 | # out = torch.cat((out_A, out_B), 0) 50 | return out_A, out_B 51 | 52 | 53 | 54 | class GAN_dis_AE_patch(nn.Module): 55 | def __init__(self): 56 | super(GAN_dis_AE_patch, self).__init__() 57 | # for source domain only 58 | model_A_patch = [ResDis_cluster(n_in=128, n_out=256, kernel_size=3, stride=2, padding=1, w=64, h=64)] 59 | self.model_A_patch = nn.Sequential(*model_A_patch) 60 | # self.model_A_patch.apply(gaussian_weights_init) 61 | 62 | def forward(self, rois_features): 63 | out_C = self.model_A_patch(rois_features) 64 | out_C = torch.sigmoid(out_C) # size(4, 512) 65 | return out_C 66 | 67 | # class GAN_dis_AE_patch_tar(nn.Module): 68 | # def __init__(self): 69 | # super(GAN_dis_AE_patch_tar, self).__init__() 70 | # # for source domain only 71 | # model_A_patch = [ResDis_cluster(n_in=512, n_out=512, kernel_size=3, stride=2, padding=1, w=64, h=64)] 72 | # self.model_A_patch = nn.Sequential(*model_A_patch) 73 | # self.model_A_patch.apply(gaussian_weights_init) 74 | # 75 | # def forward(self, rois_features): 76 | # out_C = self.model_A_patch(rois_features) 77 | # out_C = torch.sigmoid(out_C) 78 | # return out_C 79 | 80 | class GAN_decoder_AE(nn.Module): 81 | def __init__(self, params): 82 | super(GAN_decoder_AE, self).__init__() 83 | input_dim_b = params['input_dim_b'] 84 | ch = params['ch'] # 32 85 | # n_gen_shared_blk = params['n_gen_shared_blk'] 86 | n_gen_res_blk = params['n_gen_res_blk'] # 4 87 | n_gen_front_blk = params['n_gen_front_blk'] # 3 88 | if 'res_dropout_ratio' in params.keys(): 89 | res_dropout_ratio = params['res_dropout_ratio'] 90 | else: 91 | res_dropout_ratio = 0 92 | 93 | # self.embedding1= nn.Linear(4096, 2048, bias=None) 94 | # self.embedding2 = nn.Linear(4096, 2048, bias=None) 95 | if 'neww' in params.keys(): 96 | neww = params['neww'] 97 | else: 98 | neww = 64 99 | 100 | if 'newh' in params.keys(): 101 | newh = params['newh'] 102 | else: 103 | newh = 64 104 | 105 | tch = ch 106 | decB = [] 107 | decA = [] 108 | decB += [LinUnsRes_cluster(128, neww, newh)] 109 | decA += [LinUnsRes_cluster(128, neww, newh)] 110 | 111 | for i in range(0, n_gen_res_blk): 112 | decB += [INSResBlock(tch, tch, dropout=res_dropout_ratio)] 113 | decA += [INSResBlock(tch, tch, dropout=res_dropout_ratio)] 114 | for i in range(0, n_gen_front_blk-1): 115 | decB += [LeakyReLUConvTranspose2d_2(tch, tch//2, kernel_size=3, stride=1, padding=1, output_padding=0)] 116 | decA += [LeakyReLUConvTranspose2d_2(tch, tch//2, kernel_size=3, stride=1, padding=1, output_padding=0)] 117 | tch = tch//2 118 | # decB += [nn.Conv2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)] 119 | decB += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)] 120 | decA += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)] 121 | decB += [nn.Tanh()] 122 | decA += [nn.Tanh()] 123 | 124 | # decB += [nn.LeakyReLU(inplace=True)] 125 | # self.dec_shared = nn.Sequential(*dec_shared) 126 | self.decode_B = nn.Sequential(*decB) 127 | self.decode_B.apply(gaussian_weights_init) 128 | self.decode_A = nn.Sequential(*decA) 129 | self.decode_A.apply(gaussian_weights_init) 130 | 131 | def forward(self, x_aa, x_bb): 132 | # x_aa and x_bb is 512 x 4096 ==> 512 x 64 x 64 133 | # out = self.dec_shared(x_A) 134 | # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0) 135 | out1 = self.decode_A(x_aa) 136 | out2 = self.decode_B(x_bb) 137 | # out = torch.cat((out1, out2), 0) 138 | return out1, out2 139 | 140 | class GAN_decoder_AE_de(nn.Module): 141 | def __init__(self, params): 142 | super(GAN_decoder_AE_de, self).__init__() 143 | input_dim_b = params['input_dim_b'] 144 | ch = params['ch'] # 32 145 | # n_gen_shared_blk = params['n_gen_shared_blk'] 146 | n_gen_res_blk = params['n_gen_res_blk'] # 3 147 | n_gen_front_blk = params['n_gen_front_blk'] # 4 148 | if 'res_dropout_ratio' in params.keys(): 149 | res_dropout_ratio = params['res_dropout_ratio'] 150 | else: 151 | res_dropout_ratio = 0 152 | 153 | # self.embedding1= nn.Linear(4096, 2048, bias=None) 154 | # self.embedding2 = nn.Linear(4096, 2048, bias=None) 155 | if 'neww' in params.keys(): 156 | neww = params['neww'] 157 | else: 158 | neww = 64 159 | 160 | if 'newh' in params.keys(): 161 | newh = params['newh'] 162 | else: 163 | newh = 64 164 | 165 | tch = ch 166 | decB = [] 167 | decA = [] 168 | decB += [LinUnsRes_cluster(128, neww, newh)] 169 | decA += [LinUnsRes_cluster(128, neww, newh)] 170 | 171 | for i in range(0, n_gen_res_blk): 172 | decB += [INSResBlock(tch, tch, dropout=res_dropout_ratio)] 173 | decA += [INSResBlock(tch, tch, dropout=res_dropout_ratio)] 174 | for i in range(0, n_gen_front_blk - 1): 175 | decB += [LeakyReLUConvTranspose2d(tch, tch // 2, kernel_size=3, stride=2, padding=1, output_padding=1)] 176 | decA += [LeakyReLUConvTranspose2d(tch, tch // 2, kernel_size=3, stride=2, padding=1, output_padding=1)] 177 | tch = tch // 2 178 | # decB += [nn.Conv2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)] 179 | decB += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)] 180 | decA += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)] 181 | decB += [nn.Tanh()] 182 | decA += [nn.Tanh()] 183 | 184 | # decB += [nn.LeakyReLU(inplace=True)] 185 | # self.dec_shared = nn.Sequential(*dec_shared) 186 | self.decode_B = nn.Sequential(*decB) 187 | self.decode_B.apply(gaussian_weights_init) 188 | self.decode_A = nn.Sequential(*decA) 189 | self.decode_A.apply(gaussian_weights_init) 190 | 191 | 192 | def forward(self, x_aa, x_bb): 193 | # x_aa and x_bb is 512 x 4096 ==> 512 x 64 x 64 194 | # out = self.dec_shared(x_A) 195 | # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0) 196 | out1 = self.decode_A(x_aa) 197 | out2 = self.decode_B(x_bb) 198 | # out = torch.cat((out1, out2), 0) 199 | return out1, out2 200 | 201 | 202 | -------------------------------------------------------------------------------- /utils/visualize_helper.py: -------------------------------------------------------------------------------- 1 | #encoding:utf8 2 | 3 | from utils import bbox_helper 4 | try: 5 | from graphviz import Digraph 6 | except Exception as e: 7 | print(e) 8 | import torch 9 | import numpy as np 10 | import cv2 11 | import os 12 | 13 | classes = [ 14 | '__background__', # always index 0 15 | 'aeroplane', 'bicycle', 'bird', 'boat', 16 | 'bottle', 'bus', 'car', 'cat', 'chair', 17 | 'cow', 'diningtable', 'dog', 'horse', 18 | 'motorbike', 'person', 'pottedplant', 19 | 'sheep', 'sofa', 'train', 'tvmonitor' 20 | ] 21 | 22 | def draw_bbox(img, bbox, color = (255,0,0)): 23 | box = np.array(bbox).astype(np.int32) 24 | return cv2.rectangle(img, tuple(box[0:2]), tuple(box[2:4]), color) 25 | 26 | def draw_keypoint(img, keypoints, color = (255,0,0)): 27 | kpts = keypoints.reshape(-1, 2).astype(np.int32) 28 | for k in range(kpts.shape[0]): 29 | if k&1: 30 | cv2.circle(img, tuple(kpts[k]), 2, color, thickness=2) # left parts:blue 31 | else: 32 | cv2.circle(img, tuple(kpts[k]), 2, color[::-1], thickness=2) # right parts: red 33 | return img 34 | def draw_mask(img, mask, thresh = 0.5): 35 | assert img.shape == mask.shape, 'img.shape:{} vs mask.shape'.format(img.shape, mask.shape) 36 | mask = (mask > thresh).astype(np.uint8) * 250 37 | img *= 0.5 38 | img += mask[..., np.newaxis] * 0.5 39 | return img 40 | 41 | 42 | def vis_results(results_dir,image_info, bboxes, keypoints, masks, heatmap, class_names): 43 | from utils.debug_helper import debugger 44 | import logging 45 | logger = logging.getLogger('global') 46 | batch_size = len(image_info) 47 | if not os.path.exists(results_dir): 48 | os.makedirs(results_dir) 49 | for b_ix in range(batch_size): 50 | image_size = image_info[b_ix] 51 | keep_ix = np.where(bboxes[:, 0] == b_ix)[0] 52 | bbs = bboxes[keep_ix] 53 | kps = keypoints[keep_ix, :, :2] if keypoints else None 54 | msks = [masks[ix] for ix in keep_ix] if masks else None 55 | 56 | hmap = heatmap[keep_ix] 57 | filename = debugger.get_filename(b_ix).split('/')[-1].split('.')[0] 58 | for r_ix, B in enumerate(bbs): 59 | box_score, class_id = B[-2:] 60 | if box_score < 0.9: 61 | continue 62 | 63 | image = debugger.get_image(b_ix).copy() 64 | x1, y1, x2, y2 = map(int, B[1:1+4]) 65 | r_h = y2 - y1 66 | r_w = x2 - x1 67 | draw_bbox(image, B[1:1+4]) 68 | category_name = class_names[int(class_id)] 69 | cv2.putText(image, 'category:{0}, score:{1}'.format(category_name,box_score), (100, 100), 2, 1, (0, 0, 255)) 70 | logger.info('{0}/{1}_{2}.jpg'.format(results_dir, filename, r_ix)) 71 | 72 | if kps: 73 | draw_keypoint(image, kps[r_ix]) 74 | #for k in range(hmap.shape[1]): 75 | # hp = hmap[r_ix, k] 76 | # hp = cv2.resize(hp, (r_w, r_h)) * 250 77 | # hp[hp < 0] = 0 78 | # img = image.copy() 79 | # img[y1:y2, x1:x2, ...] *= 0.5 80 | # img[y1:y2, x1:x2, ...] += hp[..., np.newaxis] * 0.5 81 | # cv2.imwrite('{0}/{1}_{2}_{3}.jpg'.format(results_dir, filename, r_ix, k), img) 82 | cv2.imwrite('{0}/{1}_{2}_keypoints.jpg'.format(results_dir, filename, r_ix), image) 83 | hp = cv2.resize(np.max(hmap[r_ix], axis=0), (r_w, r_h)) * 100 84 | hp[hp < 0] = 0 85 | image[y1:y2, x1:x2, ...] *= 0.5 86 | image[y1:y2, x1:x2, ...] += hp[..., np.newaxis] * 0.5 87 | cv2.imwrite('{0}/{1}_{2}_heatmap.jpg'.format(results_dir, filename, r_ix), image) 88 | if msks: 89 | draw_mask(image, msks[r_ix]) 90 | cv2.imwrite('{0}/{1}_{2}_mask.jpg'.format(results_dir, filename, r_ix), image) 91 | 92 | def vis_detections(img, bboxes, gts, img_name, score_thresh): 93 | vis_dir = 'visualize' 94 | if not os.path.exists(vis_dir): 95 | os.makedirs(vis_dir) 96 | img_name = img_name.rsplit('/',1)[-1].split('.')[0] 97 | overlaps = bbox_helper.bbox_iou_overlaps(bboxes, gts) 98 | max_overlaps = overlaps.max(axis=1) 99 | for box_ix in range(bboxes.shape[0]): 100 | box = bboxes[box_ix, :4].astype(np.int32) 101 | score = bboxes[box_ix, 4] 102 | if score < score_thresh: 103 | continue 104 | cls = int(bboxes[box_ix, 5]) 105 | img_cpy = img.copy() 106 | ov = max_overlaps[box_ix] 107 | text = 'label:%s, iou:%.3f, score:%.3f' % (classes[cls], ov, score) 108 | cv2.putText(img_cpy, text, (30, 30), 2, 0.8, (0, 0, 255)) 109 | vis = cv2.rectangle(img_cpy, tuple(box[0:2]), tuple(box[2:4]), (255, 0, 0)) 110 | cv2.imwrite('%s/%s_%d.jpg' %(vis_dir, img_name, box_ix), vis) 111 | 112 | def vis_batch(input, output_dir, prefix): 113 | from utils.debug_helper import debugger 114 | import logging 115 | logger = logging.getLogger('global') 116 | if not os.path.exists(output_dir): 117 | os.makedirs(output_dir) 118 | if torch.is_tensor(input[0]): 119 | debugger.store_tensor_as_image(input[0]) 120 | 121 | image_info = input[1] 122 | gt_boxes = input[2] 123 | ignores = input[3] 124 | kpts = input[4] 125 | masks = input[5] 126 | #filenames = input[6] 127 | B = gt_boxes.shape[0] 128 | for b in range(B): 129 | #image = imgs[b] 130 | image = debugger.get_image(b) 131 | bxs = gt_boxes[b] 132 | #igs = ignores[b] 133 | kts = kpts[b] 134 | #mks = masks[b] 135 | n = bxs.shape[0] 136 | for ix in range(n): 137 | img_cpy = image.copy() 138 | draw_bbox(img_cpy, bxs[ix]) 139 | draw_keypoint(img_cpy, kts[ix]) 140 | #draw_mask(img_cpy, mks[ix]) 141 | filename = os.path.join(output_dir, '{0}_{1}_{2}.jpg'.format(prefix, b, ix)) 142 | cv2.imwrite(filename, img_cpy) 143 | #for ix in range(igs.shape[0]): 144 | # img_cpy = imgs[b].copy() 145 | # draw_bbox(img_cpy, igs[ix], color=(0,0,255)) 146 | # filename = os.path.join(test_dir, '{0}_{1}_{2}.jpg'.format(prefix, b, ix + n)) 147 | # cv2.imwrite(filename, img_cpy) 148 | 149 | def make_dot(var, params=None): 150 | """ Produces Graphviz representation of PyTorch autograd graph 151 | 152 | Blue nodes are the Variables that require grad, orange are Tensors 153 | saved for backward in torch.autograd.Function 154 | 155 | Args: 156 | var: output Variable 157 | params: dict of (name, Variable) to add names to node that 158 | require grad (TODO: make optional) 159 | """ 160 | if params is not None: 161 | # assert isinstance(params.values()[0], Variable) 162 | param_map = {id(v): k for k, v in params.items()} 163 | 164 | node_attr = dict(style='filled', 165 | shape='box', 166 | align='left', 167 | fontsize='12', 168 | ranksep='0.1', 169 | height='0.2') 170 | dot = Digraph(node_attr=node_attr, graph_attr=dict(size="20,20"), format='svg') 171 | seen = set() 172 | 173 | def size_to_str(size): 174 | return '('+(', ').join(['%d' % v for v in size])+')' 175 | 176 | def add_nodes(var): 177 | if var not in seen: 178 | if torch.is_tensor(var): 179 | dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange') 180 | elif hasattr(var, 'variable'): 181 | u = var.variable 182 | name = param_map[id(u)] if params is not None else '' 183 | node_name = '%s\n %s' % (name, size_to_str(u.size())) 184 | dot.node(str(id(var)), node_name, fillcolor='lightblue') 185 | else: 186 | dot.node(str(id(var)), str(type(var).__name__)) 187 | seen.add(var) 188 | if hasattr(var, 'next_functions'): 189 | for u in var.next_functions: 190 | if u[0] is not None: 191 | dot.edge(str(id(u[0])), str(id(var))) 192 | add_nodes(u[0]) 193 | if hasattr(var, 'saved_tensors'): 194 | for t in var.saved_tensors: 195 | dot.edge(str(id(t)), str(id(var))) 196 | add_nodes(t) 197 | add_nodes(var.grad_fn) 198 | return dot 199 | 200 | def visualize(var, filename): 201 | make_dot() 202 | --------------------------------------------------------------------------------