├── models
├── __init__.py
├── mask_rcnn
│ └── __init__.py
├── faster_rcnn
│ ├── __init__.py
│ ├── init.py
│ ├── vgg_adver_expansion_cluster.py
│ └── test_module.py
├── head.py
└── losses.py
├── utils
├── __init__.py
├── distributed_utils.py
├── log_helper.py
├── lr_helper.py
├── load_helper.py
├── coco_eval.py
├── anchor_helper.py
├── bbox_helper.py
├── cal_mAP.py
└── visualize_helper.py
├── datasets
├── __init__.py
├── pycocotools
│ ├── __init__.py
│ ├── Makefile
│ ├── setup.py
│ ├── common
│ │ ├── maskApi.h
│ │ └── gason.h
│ └── mask.py
├── target_dataset.py
├── example_loader.py
├── coco_loader.py
└── example_dataset.py
├── functions
├── __init__.py
├── predict_bbox.py
├── rpn_proposal.py
├── proposal_assign.py
└── anchor_target.py
├── extensions
├── _nms
│ ├── __init__.py
│ ├── _ext
│ │ ├── __init__.py
│ │ └── nms
│ │ │ └── __init__.py
│ ├── src
│ │ ├── nms_cuda.h
│ │ ├── nms.h
│ │ ├── cuda
│ │ │ ├── nms_kernel.h
│ │ │ └── nms_kernel.cu
│ │ ├── nms_cuda.c
│ │ └── nms.c
│ ├── build.sh
│ ├── build.py
│ └── pth_nms.py
├── _roi_align
│ ├── __init__.py
│ ├── _ext
│ │ ├── __init__.py
│ │ └── roi_align
│ │ │ └── __init__.py
│ ├── functions
│ │ ├── __init__.py
│ │ └── roi_align.py
│ ├── modules
│ │ ├── __init__.py
│ │ └── roi_align.py
│ ├── build.sh
│ ├── src
│ │ ├── roi_align_cuda.h
│ │ ├── roi_align_kernel.h
│ │ ├── roi_align_cuda.c
│ │ └── roi_align_kernel.cu
│ └── build.py
├── _bbox_helper
│ ├── __init__.py
│ ├── _ext
│ │ ├── __init__.py
│ │ └── bbox_helper
│ │ │ └── __init__.py
│ ├── src
│ │ ├── bbox_helper.h
│ │ ├── bbox_helper_cuda.h
│ │ ├── bbox_helper.c
│ │ ├── cuda
│ │ │ ├── iou_overlap_kernel.h
│ │ │ └── iou_overlap_kernel.cu
│ │ └── bbox_helper_cuda.c
│ ├── build.sh
│ ├── bbox_helper.py
│ └── build.py
├── _roi_pooling
│ ├── __init__.py
│ ├── _ext
│ │ ├── __init__.py
│ │ └── roi_pooling
│ │ │ └── __init__.py
│ ├── modules
│ │ ├── __init__.py
│ │ ├── roi_pool.py
│ │ └── roi_pool_py.py
│ ├── functions
│ │ ├── __init__.py
│ │ └── roi_pool.py
│ ├── src
│ │ ├── roi_pooling.h
│ │ ├── roi_pooling_cuda.h
│ │ ├── roi_pooling_kernel.h
│ │ ├── roi_pooling_cuda.c
│ │ └── roi_pooling.c
│ ├── build.sh
│ └── build.py
├── _focal_loss
│ ├── _ext
│ │ ├── __init__.py
│ │ └── focal_loss
│ │ │ └── __init__.py
│ ├── build.sh
│ ├── src
│ │ ├── cuda
│ │ │ ├── focal_loss_sigmoid_kernel.h
│ │ │ ├── focal_loss_softmax_kernel.h
│ │ │ ├── focal_loss_sigmoid_kernel.cu
│ │ │ └── focal_loss_softmax_kernel.cu
│ │ ├── focal_loss_cuda.h
│ │ └── focal_loss_cuda.c
│ ├── build.py
│ └── focal_loss.py
├── _cython_bbox
│ ├── build.sh
│ ├── setup.py
│ ├── cython_bbox.pyx
│ └── cython_nms.pyx
├── __init__.py
└── build_all.sh
├── img
└── pipeline4.png
├── examples
└── faster-rcnn
│ └── cityscapes
│ └── vgg
│ ├── eval_single.sh
│ ├── 2cluster.sh
│ ├── 4cluster.sh
│ ├── 8cluster.sh
│ ├── eval.sh
│ └── config_512.json
└── README.md
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/functions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_nms/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/mask_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_nms/_ext/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_align/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/_ext/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/_ext/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/datasets/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/img/pipeline4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xinge008/SCDA/HEAD/img/pipeline4.png
--------------------------------------------------------------------------------
/extensions/_cython_bbox/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python setup.py build_ext --inplace
3 |
--------------------------------------------------------------------------------
/extensions/_nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);
--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper.h:
--------------------------------------------------------------------------------
1 | int cpu_iou_overlaps(THFloatTensor * bboxes1, THFloatTensor * bboxes2, THFloatTensor * output);
2 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_iou_overlaps(THCudaTensor * bboxes1, THCudaTensor * bboxes2, THCudaTensor * output);
2 |
--------------------------------------------------------------------------------
/extensions/_nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);
--------------------------------------------------------------------------------
/extensions/_nms/build.sh:
--------------------------------------------------------------------------------
1 | cd src/cuda
2 | echo "Compiling nms kernels by nvcc..."
3 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
4 | cd ../../
5 | python build.py
6 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/build.sh:
--------------------------------------------------------------------------------
1 | cd src/cuda
2 | echo "Compiling nms kernels by nvcc..."
3 | nvcc -c -o iou_overlap_kernel.cu.o iou_overlap_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
4 | cd ../../
5 | python build.py
6 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);
--------------------------------------------------------------------------------
/extensions/__init__.py:
--------------------------------------------------------------------------------
1 | from extensions._nms.pth_nms import pth_nms as nms
2 | # from extensions._psroi_pooling.psroi_pool import PSRoIPool
3 | from extensions._roi_pooling.modules.roi_pool import _RoIPooling as RoIPool
4 | # from extensions._deformable_convolution.deformable_conv import *
--------------------------------------------------------------------------------
/extensions/_roi_align/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling my_lib kernels by nvcc..."
7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
8 |
9 | cd ../
10 | python build.py
11 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CUDA_PATH=/usr/local/cuda/
4 |
5 | cd src
6 | echo "Compiling my_lib kernels by nvcc..."
7 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
8 |
9 | cd ../
10 | python build.py
11 |
12 |
--------------------------------------------------------------------------------
/extensions/build_all.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for file in ./*
3 | do
4 | if test -d $file && test -f $file/build.sh
5 | then
6 | cd $file
7 | echo building $file
8 | bash build.sh
9 | if [ $? != 0 ]; then
10 | exit
11 | fi
12 | cd ..
13 | fi
14 | done
15 |
--------------------------------------------------------------------------------
/datasets/pycocotools/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | # install pycocotools locally
3 | python setup.py build_ext --inplace
4 | rm -rf build
5 |
6 | install:
7 | # install pycocotools to the Python site-packages
8 | python setup.py build_ext install
9 | rm -rf build
10 | clean:
11 | rm _mask.c _mask.cpython-36m-x86_64-linux-gnu.so
12 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/build.sh:
--------------------------------------------------------------------------------
1 | cd src/cuda
2 | echo "Compiling focal_loss kernels by nvcc..."
3 | nvcc -c -o focal_loss_sigmoid_kernel.cu.o focal_loss_sigmoid_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
4 | nvcc -c -o focal_loss_softmax_kernel.cu.o focal_loss_softmax_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
5 | cd ../../
6 | python build.py
7 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int cpu_iou_overlaps(THFloatTensor * bboxes1, THFloatTensor * bboxes2, THFloatTensor * output){
5 |
6 | float * bboxes1_flat = THFloatTensor_data(bboxes1);
7 | float * bboxes2_flat = THFloatTensor_data(bboxes2);
8 |
9 | // TO BE IMPLEMENTED
10 | }
11 |
--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 |
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 |
--------------------------------------------------------------------------------
/extensions/_nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _NMS_KERNEL
2 | #define _NMS_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 |
11 | void _nms(int boxes_num, float * boxes_dev,
12 | unsigned long long * mask_dev, float nms_overlap_thresh);
13 |
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 |
18 | #endif
19 |
20 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 |
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);
--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/cuda/iou_overlap_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _IOU_OVERLAP_KERNEL
2 | #define _IOU_OVERLAP_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int IOUOverlap(
9 | const float* bboxes1_data, const float* bboxes2_data,
10 | const int size_bbox,
11 | const int num_bbox1,
12 | const int num_bbox2,
13 | float* top_data,
14 | cudaStream_t stream);
15 |
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 |
20 | #endif
21 |
22 |
--------------------------------------------------------------------------------
/extensions/_nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._nms import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/extensions/_roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_align import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/_ext/bbox_helper/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._bbox_helper import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/_ext/focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._focal_loss import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._roi_pooling import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/models/faster_rcnn/init.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 18-4-19
3 | # @Author : Xinge
4 | import torch.nn.init as init
5 | import numpy as np
6 |
7 |
8 | def gaussian_weights_init(m):
9 | classname = m.__class__.__name__
10 | if classname.find('Conv') != -1 and classname.find('Conv') == 0:
11 | # print m.__class__.__name__
12 | m.weight.data.normal_(0.0, 0.02)
13 |
14 | def xavier_weights_init(m):
15 | classname = m.__class__.__name__
16 | if classname.find('Conv') != -1:
17 | init.xavier_uniform(m.weight, gain=np.sqrt(2))
18 | init.constant(m.bias, 0.1)
19 |
20 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/bbox_helper.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from extensions._bbox_helper._ext import bbox_helper
3 | import numpy as np
4 |
5 | def overlap(bboxes1, bboxes2):
6 | # bboxes1, bboxes2 has to be a tensor
7 | # bboxes1 [N, 4]: x1, y1, x2, y2
8 | # bboxes2 [M, 4]: x1, y1, x2, y2
9 | bboxes1 = torch.from_numpy(bboxes1[:, :4]).float().cuda().contiguous()
10 | bboxes2 = torch.from_numpy(bboxes2[:, :4]).float().cuda().contiguous()
11 |
12 | output = torch.cuda.FloatTensor(bboxes1.shape[0], bboxes2.shape[0])
13 | bbox_helper.gpu_iou_overlaps(bboxes1, bboxes2, output)
14 |
15 | return output.cpu().numpy()
16 |
17 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from ..functions.roi_pool import RoIPoolFunction
3 |
4 |
5 | class _RoIPooling(Module):
6 | def __init__(self, pooled_height, pooled_width, spatial_scale):
7 | super(_RoIPooling, self).__init__()
8 |
9 | self.pooled_width = int(pooled_width)
10 | self.pooled_height = int(pooled_height)
11 | self.spatial_scale = float(spatial_scale)
12 |
13 | def forward(self, features, rois):
14 | assert(rois.shape[1] == 5)
15 | return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
16 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_sigmoid_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _FOCAL_LOSS_SIGMOID_KERNEL
2 | #define _FOCAL_LOSS_SIGMOID_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int SigmoidFocalLossForwardLaucher(
9 | const int N, const float* logits,
10 | const int* targets, const float weight_pos,
11 | const float gamma, const float alpha,
12 | const int num_classes, float* losses, cudaStream_t stream);
13 |
14 | int SigmoidFocalLossBackwardLaucher(
15 | const int N, const float* logits,
16 | const int* targets, float* dX_data, const float weight_pos,
17 | const float gamma, const float alpha, const int num_classes,
18 | cudaStream_t stream);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
25 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_softmax_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _FOCAL_LOSS_SOFTMAX_KERNEL
2 | #define _FOCAL_LOSS_SOFTMAX_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int SoftmaxFocalLossForwardLaucher(
9 | const int N, const float* logits,
10 | const int* targets, const float weight_pos,
11 | const float gamma, const float alpha,
12 | const int num_classes, float* losses,
13 | float* priors, cudaStream_t stream);
14 |
15 | int SoftmaxFocalLossBackwardLaucher(
16 | const int N, const float* logits,
17 | const int* targets, float* dX_data, const float weight_pos,
18 | const float gamma, const float alpha, const int num_classes,
19 | const float* priors, float* buff, cudaStream_t stream);
20 |
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 |
25 | #endif
26 |
--------------------------------------------------------------------------------
/datasets/pycocotools/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | from distutils.extension import Extension
4 | import numpy as np
5 |
6 | # To compile and install locally run "python setup.py build_ext --inplace"
7 | # To install library to Python site-packages run "python setup.py build_ext install"
8 |
9 | ext_modules = [
10 | Extension(
11 | '_mask',
12 | sources=['common/maskApi.c', '_mask.pyx'],
13 | include_dirs = [np.get_include(), 'common'],
14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 | )
16 | ]
17 |
18 | setup(name='pycocotools',
19 | packages=['pycocotools'],
20 | package_dir = {'pycocotools': '.'},
21 | version='2.0',
22 | ext_modules=
23 | cythonize(ext_modules)
24 | )
25 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_POOLING_KERNEL
2 | #define _ROI_POOLING_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int ROIPoolForwardLaucher(
9 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 | const int width, const int channels, const int pooled_height,
11 | const int pooled_width, const float* bottom_rois,
12 | float* top_data, int* argmax_data, cudaStream_t stream);
13 |
14 |
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 | const int height, const int width, const int channels, const int pooled_height,
17 | const int pooled_width, const float* bottom_rois,
18 | float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 |
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 |
24 | #endif
25 |
26 |
--------------------------------------------------------------------------------
/extensions/_nms/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/nms.c']
7 | headers = ['src/nms.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/nms_cuda.c']
14 | headers += ['src/nms_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.nms',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects
31 | )
32 |
33 | if __name__ == '__main__':
34 | ffi.build()
35 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/roi_pooling.c']
7 | headers = ['src/roi_pooling.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/roi_pooling_cuda.c']
14 | headers += ['src/roi_pooling_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/roi_pooling.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.roi_pooling',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects
31 | )
32 |
33 | if __name__ == '__main__':
34 | ffi.build()
35 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = ['src/bbox_helper.c']
7 | headers = ['src/bbox_helper.h']
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/bbox_helper_cuda.c']
14 | headers += ['src/bbox_helper_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/iou_overlap_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 |
23 | ffi = create_extension(
24 | '_ext.bbox_helper',
25 | headers=headers,
26 | sources=sources,
27 | define_macros=defines,
28 | relative_to=__file__,
29 | with_cuda=with_cuda,
30 | extra_objects=extra_objects
31 | )
32 |
33 | if __name__ == '__main__':
34 | ffi.build()
35 |
--------------------------------------------------------------------------------
/extensions/_roi_align/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 | # sources = ['src/roi_align.c']
6 | # headers = ['src/roi_align.h']
7 | sources = []
8 | headers = []
9 | defines = []
10 | with_cuda = False
11 |
12 | if torch.cuda.is_available():
13 | print('Including CUDA code.')
14 | sources += ['src/roi_align_cuda.c']
15 | headers += ['src/roi_align_cuda.h']
16 | defines += [('WITH_CUDA', None)]
17 | with_cuda = True
18 |
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_align_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 |
24 | ffi = create_extension(
25 | '_ext.roi_align',
26 | headers=headers,
27 | sources=sources,
28 | define_macros=defines,
29 | relative_to=__file__,
30 | with_cuda=with_cuda,
31 | extra_objects=extra_objects
32 | )
33 |
34 | if __name__ == '__main__':
35 | ffi.build()
36 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/build.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from torch.utils.ffi import create_extension
4 |
5 |
6 | sources = []
7 | headers = []
8 | defines = []
9 | with_cuda = False
10 |
11 | if torch.cuda.is_available():
12 | print('Including CUDA code.')
13 | sources += ['src/focal_loss_cuda.c']
14 | headers += ['src/focal_loss_cuda.h']
15 | defines += [('WITH_CUDA', None)]
16 | with_cuda = True
17 |
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/focal_loss_sigmoid_kernel.cu.o', 'src/cuda/focal_loss_softmax_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | print('extra_objects {0}'.format(extra_objects))
23 |
24 | ffi = create_extension(
25 | '_ext.focal_loss',
26 | headers=headers,
27 | sources=sources,
28 | define_macros=defines,
29 | relative_to=__file__,
30 | with_cuda=with_cuda,
31 | extra_objects=extra_objects
32 | )
33 |
34 | if __name__ == '__main__':
35 | ffi.build()
36 |
--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/eval_single.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ROOT=../../../..
3 | export PYTHONPATH=$ROOT:$PYTHONPATH
4 | #--------------------------
5 | job_name=Test
6 | ckdir=4cluster
7 | mkdir -p ./${ckdir}/${job_name}
8 | #--------------------------
9 |
10 | python -u $ROOT/tools/faster_rcnn_train_val.py \
11 | --config=config_512.json \
12 | --dist=0 \
13 | --fix_num=3 \
14 | --L1=1 \
15 | -e \
16 | --cluster_num=4 \
17 | --threshold=128 \
18 | --recon_size=256 \
19 | --port=21603 \
20 | --arch=vgg16_FasterRCNN \
21 | --warmup_epochs=1 \
22 | --lr=0.0000125 \
23 | --step_epochs=16,22 \
24 | --batch-size=1 \
25 | --epochs=25 \
26 | --dataset=cityscapes \
27 | --resume=/path/to/checkpoint.pth \
28 | --train_meta_file=/path/to/train.txt \
29 | --target_meta_file=/path/to/foggy_train.txt \
30 | --val_meta_file=/path/to/foggy_val.txt \
31 | --datadir=/path/to/leftImg8bit/ \
32 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
33 | --results_dir=${ckdir}/${job_name}/results_dir \
34 | --save_dir=${ckdir}/${job_name} \
35 | 2>&1 | tee ${ckdir}/${job_name}/train.log
36 |
--------------------------------------------------------------------------------
/models/head.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | class NaiveRpnHead(nn.Module):
4 | def __init__(self, inplanes, num_classes, num_anchors):
5 | '''
6 | Args:
7 | inplanes: input channel
8 | num_classes: as the name implies
9 | num_anchors: as the name implies
10 | '''
11 | super(NaiveRpnHead, self).__init__()
12 | self.num_anchors, self.num_classes = num_anchors, num_classes
13 | self.conv3x3 = nn.Conv2d(inplanes, 512, kernel_size=3, stride=1, padding=1)
14 | self.relu3x3 = nn.ReLU(inplace=True)
15 | self.conv_cls = nn.Conv2d(
16 | 512, num_anchors * num_classes, kernel_size=1, stride=1)
17 | self.conv_loc = nn.Conv2d(
18 | 512, num_anchors * 4, kernel_size=1, stride=1)
19 |
20 | def forward(self, x):
21 | '''
22 | Args:
23 | x: [B, inplanes, h, w], input feature
24 | Return:
25 | pred_cls: [B, num_anchors, h, w]
26 | pred_loc: [B, num_anchors*4, h, w]
27 | '''
28 | x = self.conv3x3(x)
29 | x = self.relu3x3(x)
30 | pred_cls = self.conv_cls(x)
31 | pred_loc = self.conv_loc(x)
32 | return pred_cls, pred_loc
33 |
--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/2cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ROOT=../../../..
3 | export PYTHONPATH=$ROOT:$PYTHONPATH
4 | #--------------------------
5 | job_name=training_2cluster
6 | ckdir=2cluster
7 | mkdir -p ./${ckdir}/${job_name}
8 | #--------------------------
9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 |
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | --ntasks-per-node=${GPUS_PER_NODE} \
15 | --job-name=${job_name} \
16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
17 | --config=config_512.json \
18 | --dist=1 \
19 | --fix_num=0 \
20 | --L1=1 \
21 | --cluster_num=2 \
22 | --threshold=256 \
23 | --recon_size=512 \
24 | --port=21603 \
25 | --arch=vgg16_FasterRCNN \
26 | --warmup_epochs=1 \
27 | --lr=0.0000125 \
28 | --step_epochs=16,22 \
29 | --batch-size=1 \
30 | --epochs=25 \
31 | --dataset=cityscapes \
32 | --train_meta_file=/path/to/train.txt \
33 | --target_meta_file=/path/to/foggy_train.txt \
34 | --val_meta_file=/path/to/foggy_val.txt \
35 | --datadir=/path/to/leftImg8bit/ \
36 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
37 | --results_dir=${ckdir}/${job_name}/results_dir \
38 | --save_dir=${ckdir}/${job_name} \
39 | 2>&1 | tee ${ckdir}/${job_name}/train.log
40 |
--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/4cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ROOT=../../../..
3 | export PYTHONPATH=$ROOT:$PYTHONPATH
4 | #--------------------------
5 | job_name=training_4cluster
6 | ckdir=4cluster
7 | mkdir -p ./${ckdir}/${job_name}
8 | #--------------------------
9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 |
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | --ntasks-per-node=${GPUS_PER_NODE} \
15 | --job-name=${job_name} \
16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
17 | --config=config_512.json \
18 | --dist=1 \
19 | --fix_num=0 \
20 | --L1=1 \
21 | --cluster_num=4 \
22 | --threshold=128 \
23 | --recon_size=256 \
24 | --port=21603 \
25 | --arch=vgg16_FasterRCNN \
26 | --warmup_epochs=1 \
27 | --lr=0.0000125 \
28 | --step_epochs=16,22 \
29 | --batch-size=1 \
30 | --epochs=25 \
31 | --dataset=cityscapes \
32 | --train_meta_file=/path/to/train.txt \
33 | --target_meta_file=/path/to/foggy_train.txt \
34 | --val_meta_file=/path/to/foggy_val.txt \
35 | --datadir=/path/to/leftImg8bit/ \
36 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
37 | --results_dir=${ckdir}/${job_name}/results_dir \
38 | --save_dir=${ckdir}/${job_name} \
39 | 2>&1 | tee ${ckdir}/${job_name}/train.log
40 |
41 |
--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/8cluster.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ROOT=../../../..
3 | export PYTHONPATH=$ROOT:$PYTHONPATH
4 | #--------------------------
5 | job_name=training_8cluster
6 | ckdir=8cluster
7 | mkdir -p ./${ckdir}/${job_name}
8 | #--------------------------
9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 |
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | --ntasks-per-node=${GPUS_PER_NODE} \
15 | --job-name=${job_name} \
16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
17 | --config=config_512.json \
18 | --dist=1 \
19 | --fix_num=0 \
20 | --L1=1 \
21 | --cluster_num=8 \
22 | --threshold=64 \
23 | --recon_size=128 \
24 | --port=21603 \
25 | --arch=vgg16_FasterRCNN \
26 | --warmup_epochs=1 \
27 | --lr=0.0000125 \
28 | --step_epochs=16,22 \
29 | --batch-size=1 \
30 | --epochs=25 \
31 | --dataset=cityscapes \
32 | --train_meta_file=/path/to/train.txt \
33 | --target_meta_file=/path/to/foggy_train.txt \
34 | --val_meta_file=/path/to/foggy_val.txt \
35 | --datadir=/path/to/leftImg8bit/ \
36 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
37 | --results_dir=${ckdir}/${job_name}/results_dir \
38 | --save_dir=${ckdir}/${job_name} \
39 | 2>&1 | tee ${ckdir}/${job_name}/train.log
40 |
41 |
--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/eval.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ROOT=../../../..
3 | export PYTHONPATH=$ROOT:$PYTHONPATH
4 | #--------------------------
5 | job_name=Test
6 | ckdir=4cluster
7 | mkdir -p ./${ckdir}/${job_name}
8 | #--------------------------
9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 |
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | --ntasks-per-node=${GPUS_PER_NODE} \
15 | --job-name=${job_name} \
16 | python -u $ROOT/tools/faster_rcnn_train_val.py \
17 | --config=config_512.json \
18 | --dist=1 \
19 | --fix_num=3 \
20 | --L1=1 \
21 | -e \
22 | --cluster_num=4 \
23 | --threshold=128 \
24 | --recon_size=256 \
25 | --port=21603 \
26 | --arch=vgg16_FasterRCNN \
27 | --warmup_epochs=1 \
28 | --lr=0.0000125 \
29 | --step_epochs=16,22 \
30 | --batch-size=1 \
31 | --epochs=25 \
32 | --dataset=cityscapes \
33 | --resume=/path/to/checkpoint.pth \
34 | --train_meta_file=/path/to/train.txt \
35 | --target_meta_file=/path/to/foggy_train.txt \
36 | --val_meta_file=/path/to/foggy_val.txt \
37 | --datadir=/path/to/leftImg8bit/ \
38 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
39 | --results_dir=${ckdir}/${job_name}/results_dir \
40 | --save_dir=${ckdir}/${job_name} \
41 | 2>&1 | tee ${ckdir}/${job_name}/train.log
42 |
--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _ROI_ALIGN_KERNEL
2 | #define _ROI_ALIGN_KERNEL
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
9 | const float spatial_scale, const int height, const int width,
10 | const int channels, const int aligned_height, const int aligned_width,
11 | const float* bottom_rois, float* top_data);
12 |
13 | int ROIAlignForwardLaucher(
14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 | const int width, const int channels, const int aligned_height,
16 | const int aligned_width, const float* bottom_rois,
17 | float* top_data, cudaStream_t stream);
18 |
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 | const float spatial_scale, const int height, const int width,
21 | const int channels, const int aligned_height, const int aligned_width,
22 | float* bottom_diff, const float* bottom_rois);
23 |
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 | const int height, const int width, const int channels, const int aligned_height,
26 | const int aligned_width, const float* bottom_rois,
27 | float* bottom_diff, cudaStream_t stream);
28 |
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 |
33 | #endif
34 |
35 |
--------------------------------------------------------------------------------
/extensions/_nms/pth_nms.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from extensions._nms._ext import nms
3 | import numpy as np
4 |
5 | def pth_nms(dets, thresh):
6 | #"""
7 | #dets has to be a tensor
8 | #"""
9 | #if not dets.is_cuda:
10 | # x1 = dets[:, 0]
11 | # y1 = dets[:, 1]
12 | # x2 = dets[:, 2]
13 | # y2 = dets[:, 3]
14 | # scores = dets[:, 4]
15 |
16 | # areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 | # order = scores.sort(0, descending=True)[1]
18 | # # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 |
20 | # keep = torch.LongTensor(dets.size(0))
21 | # num_out = torch.LongTensor(1)
22 | # nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 |
24 | # return keep[:num_out[0]]
25 | #else:
26 |
27 | #x1 = dets[:, 0]
28 | #y1 = dets[:, 1]
29 | #x2 = dets[:, 2]
30 | #y2 = dets[:, 3]
31 | # scores = dets[:, 4].cuda().contiguous()
32 | dets = dets.cuda().contiguous()
33 |
34 |
35 | #areas = (x2 - x1 + 1) * (y2 - y1 + 1)
36 | # order = scores.sort(0, descending=True)[1][:6000]
37 | # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
38 |
39 | # dets = dets[order].contiguous()
40 |
41 | keep = torch.LongTensor(dets.size(0))
42 | num_out = torch.LongTensor(1)
43 | # keep = torch.cuda.LongTensor(dets.size(0))
44 | # num_out = torch.cuda.LongTensor(1)
45 | nms.gpu_nms(keep, num_out, dets.float(), thresh)
46 |
47 | return keep[:num_out[0]].cpu().contiguous()
48 | # return order[keep[:num_out[0]]].contiguous()
49 |
50 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper_cuda.c:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 |
13 | #include "cuda/iou_overlap_kernel.h"
14 |
15 |
16 | extern THCState *state;
17 |
18 | int gpu_iou_overlaps(THCudaTensor * bboxes1, THCudaTensor * bboxes2, THCudaTensor * output){
19 | // Grad the input tensor
20 | float * bboxes1_data = THCudaTensor_data(state, bboxes1);
21 | float * bboxes2_data = THCudaTensor_data(state, bboxes2);
22 | float * output_data = THCudaTensor_data(state, output);
23 |
24 | // Number of boxes
25 | int num_bbox1 = THCudaTensor_size(state, bboxes1, 0);
26 | int num_bbox2 = THCudaTensor_size(state, bboxes2, 0);
27 | int size_bbox1 = THCudaTensor_size(state, bboxes1, 1);
28 | int size_bbox2 = THCudaTensor_size(state, bboxes2, 1);
29 |
30 | assert(size_bbox1 == 4);
31 | assert(size_bbox2 == 4);
32 | if(size_bbox1 != 4 || size_bbox2 != 4){
33 | exit(1);
34 | return 0;
35 | }
36 |
37 | cudaStream_t stream = THCState_getCurrentStream(state);
38 | IOUOverlap(
39 | bboxes1_data,
40 | bboxes2_data,
41 | size_bbox1,
42 | num_bbox1,
43 | num_bbox2,
44 | output_data,
45 | stream);
46 | return 1;
47 | }
48 |
--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/config_512.json:
--------------------------------------------------------------------------------
1 | {
2 | "shared": {
3 | "gan_model_flag": 2,
4 | "scales": [512],
5 | "max_size": 1024,
6 | "anchor_scales": [2, 4, 8, 16, 32],
7 | "anchor_ratios": [0.5, 1, 2],
8 | "anchor_stride": 16,
9 | "bbox_normalize_stats_precomputed": true,
10 | "bbox_normalize_stds": [0.1, 0.1, 0.2, 0.2],
11 | "bbox_normalize_means": [0, 0, 0, 0],
12 | "num_classes": 9,
13 | "class_names":[
14 | "__background__",
15 | "person", "rider", "car", "truck",
16 | "bus", "train", "motorcycle", "bicycle"],
17 | "roi_align": false
18 | },
19 | "train_anchor_target_cfg": {
20 | "rpn_batch_size": 256,
21 | "nms_iou_thresh": 0.7,
22 | "positive_iou_thresh": 0.7,
23 | "negative_iou_thresh": 0.3,
24 | "positive_percent": 0.5,
25 | "ignore_iou_thresh": 0.5
26 | },
27 | "train_rpn_proposal_cfg": {
28 | "nms_iou_thresh": 0.7,
29 | "pre_nms_top_n": 12000,
30 | "post_nms_top_n": 2000,
31 | "roi_min_size": 2
32 | },
33 | "train_proposal_target_cfg": {
34 | "batch_size": 512,
35 | "positive_iou_thresh": 0.5,
36 | "negative_iou_thresh_hi": 0.5,
37 | "negative_iou_thresh_lo": 0.0,
38 | "ignore_iou_thresh": 0.5,
39 | "positive_percent": 0.25,
40 | "append_gts": true
41 | },
42 | "test_rpn_proposal_cfg": {
43 | "nms_iou_thresh": 0.7,
44 | "pre_nms_top_n": 6000,
45 | "post_nms_top_n": 300,
46 | "roi_min_size": 2
47 | },
48 | "test_predict_bbox_cfg": {
49 | "nms_iou_thresh": 0.5,
50 | "score_thresh": 0.00,
51 | "top_n": 100
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/extensions/_cython_bbox/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 |
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 |
20 | from Cython.Build import cythonize
21 | from setuptools import Extension
22 | from setuptools import setup
23 |
24 | import numpy as np
25 |
26 | _NP_INCLUDE_DIRS = np.get_include()
27 |
28 |
29 | # Extension modules
30 | ext_modules = [
31 | Extension(
32 | name='cython_bbox',
33 | sources=[
34 | 'cython_bbox.pyx'
35 | ],
36 | extra_compile_args=[
37 | '-Wno-cpp'
38 | ],
39 | include_dirs=[
40 | _NP_INCLUDE_DIRS
41 | ]
42 | ),
43 | Extension(
44 | name='cython_nms',
45 | sources=[
46 | 'cython_nms.pyx'
47 | ],
48 | extra_compile_args=[
49 | '-Wno-cpp'
50 | ],
51 | include_dirs=[
52 | _NP_INCLUDE_DIRS
53 | ]
54 | )
55 | ]
56 |
57 | setup(
58 | name='Detectron',
59 | ext_modules=cythonize(ext_modules)
60 | )
61 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/src/focal_loss_cuda.h:
--------------------------------------------------------------------------------
1 |
2 | int focal_loss_sigmoid_forward_cuda(
3 | int N,
4 | THCudaTensor * logits,
5 | THCudaIntTensor * targets,
6 | float weight_pos,
7 | float gamma,
8 | float alpha,
9 | int num_classes,
10 | THCudaTensor * losses);
11 |
12 | int focal_loss_sigmoid_backward_cuda(
13 | int N,
14 | THCudaTensor * logits,
15 | THCudaIntTensor * targets,
16 | THCudaTensor * dX_data,
17 | float weight_pos,
18 | float gamma,
19 | float alpha,
20 | int num_classes);
21 |
22 | int focal_loss_softmax_forward_cuda(
23 | int N,
24 | THCudaTensor * logits,
25 | THCudaIntTensor * targets,
26 | float weight_pos,
27 | float gamma,
28 | float alpha,
29 | int num_classes,
30 | THCudaTensor * losses,
31 | THCudaTensor * priors);
32 |
33 | int focal_loss_softmax_backward_cuda(
34 | int N,
35 | THCudaTensor * logits,
36 | THCudaIntTensor * targets,
37 | THCudaTensor * dX_data,
38 | float weight_pos,
39 | float gamma,
40 | float alpha,
41 | int num_classes,
42 | THCudaTensor * priors,
43 | THCudaTensor * buff);
44 |
--------------------------------------------------------------------------------
/extensions/_roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.module import Module
2 | from torch.nn.functional import avg_pool2d, max_pool2d
3 | from ..functions.roi_align import RoIAlignFunction
4 |
5 |
6 | class RoIAlign(Module):
7 | def __init__(self, aligned_height, aligned_width, spatial_scale):
8 | super(RoIAlign, self).__init__()
9 |
10 | self.aligned_width = int(aligned_width)
11 | self.aligned_height = int(aligned_height)
12 | self.spatial_scale = float(spatial_scale)
13 |
14 | def forward(self, features, rois):
15 | return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 | self.spatial_scale)(features, rois)
17 |
18 | class RoIAlignAvg(Module):
19 | def __init__(self, aligned_height, aligned_width, spatial_scale):
20 | super(RoIAlignAvg, self).__init__()
21 |
22 | self.aligned_width = int(aligned_width)
23 | self.aligned_height = int(aligned_height)
24 | self.spatial_scale = float(spatial_scale)
25 |
26 | def forward(self, features, rois):
27 | assert(rois.shape[1] == 5)
28 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
29 | self.spatial_scale)(features, rois)
30 | return avg_pool2d(x, kernel_size=2, stride=1)
31 |
32 | class RoIAlignMax(Module):
33 | def __init__(self, aligned_height, aligned_width, spatial_scale):
34 | super(RoIAlignMax, self).__init__()
35 |
36 | self.aligned_width = int(aligned_width)
37 | self.aligned_height = int(aligned_height)
38 | self.spatial_scale = float(spatial_scale)
39 |
40 | def forward(self, features, rois):
41 | assert(rois.shape[1] == 5)
42 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
43 | self.spatial_scale)(features, rois)
44 | return max_pool2d(x, kernel_size=2, stride=1)
45 |
--------------------------------------------------------------------------------
/utils/distributed_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.distributed as dist
4 | from torch.nn import Module
5 | import torch.multiprocessing as mp
6 | import logging
7 | logger = logging.getLogger('global')
8 |
9 | def average_gradients(model):
10 | """ average gradients """
11 | for param in model.parameters():
12 | if param.requires_grad and not (param.grad is None):
13 | dist.all_reduce(param.grad.data)
14 |
15 | def broadcast_params(model):
16 | """ broadcast model parameters """
17 | # for models in model:
18 | for p in model.state_dict().values():
19 | dist.broadcast(p, 0)
20 |
21 | def dist_init(port, backend = 'nccl'):
22 | method = mp.get_start_method(allow_none=True)
23 | if method is None:
24 | mp.set_start_method('spawn')
25 | logger.info('multiprocessing start method:{}'.format(method))
26 | proc_id = int(os.environ['SLURM_PROCID'])
27 | ntasks = int(os.environ['SLURM_NTASKS'])
28 | node_list = os.environ['SLURM_NODELIST']
29 | num_gpus = torch.cuda.device_count()
30 | torch.cuda.set_device(proc_id%num_gpus)
31 |
32 | if '[' in node_list:
33 | beg = node_list.find('[')
34 | pos1 = node_list.find('-', beg)
35 | if pos1 < 0:
36 | pos1 = 1000
37 | pos2 = node_list.find(',', beg)
38 | if pos2 < 0:
39 | pos2 = 1000
40 | node_list = node_list[:min(pos1,pos2)].replace('[', '')
41 | addr = node_list[8:].replace('-', '.')
42 | os.environ['MASTER_PORT'] = port
43 | os.environ['MASTER_ADDR'] = addr
44 | os.environ['WORLD_SIZE'] = str(ntasks)
45 | os.environ['RANK'] = str(proc_id)
46 | if backend == 'nccl':
47 | dist.init_process_group(backend='nccl')
48 | else:
49 | dist.init_process_group(backend='gloo', rank=proc_id, world_size=ntasks)
50 |
51 | rank = dist.get_rank()
52 | world_size = dist.get_world_size()
53 | return rank, world_size
54 |
55 |
--------------------------------------------------------------------------------
/utils/log_helper.py:
--------------------------------------------------------------------------------
1 | #encoding: utf8
2 | from __future__ import division
3 |
4 | import os
5 | import logging
6 | import math
7 |
8 | logs = set()
9 |
10 | def init_log(name, level = logging.INFO):
11 | if (name, level) in logs: return
12 | logs.add((name, level))
13 | logger = logging.getLogger(name)
14 | logger.setLevel(level)
15 | ch = logging.StreamHandler()
16 | ch.setLevel(level)
17 | if 'SLURM_PROCID' in os.environ:
18 | rank = int(os.environ['SLURM_PROCID'])
19 | logger.addFilter(lambda record: rank == 0)
20 | else:
21 | rank = 0
22 | format_str = '%(asctime)s-rk{}-%(filename)s#%(lineno)d:%(message)s'.format(rank)
23 | formatter = logging.Formatter(format_str)
24 | ch.setFormatter(formatter)
25 | logger.addHandler(ch)
26 |
27 | # init_log('global')
28 |
29 | def print_speed(i, i_time, n):
30 | """print_speed(index, index_time, total_iteration)"""
31 | logger = logging.getLogger('global')
32 | average_time = i_time
33 | remaining_time = (n - i) * average_time
34 | remaining_day = math.floor(remaining_time / 86400)
35 | remaining_hour = math.floor(remaining_time / 3600 - remaining_day * 24)
36 | remaining_min = math.floor(remaining_time / 60 - remaining_day * 1440 - remaining_hour * 60)
37 | logger.info('Progress: %d / %d [%d%%], Speed: %.3f s/iter, ETA %d:%02d:%02d (D:H:M)\n' % (i, n, i/n*100, average_time, remaining_day, remaining_hour, remaining_min))
38 |
39 |
40 | def main():
41 | for i, lvl in enumerate([logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL]):
42 | log_name = str(lvl)
43 | init_log(log_name, lvl)
44 | logger = logging.getLogger(log_name)
45 | print('****cur lvl:{}'.format(lvl))
46 | logger.debug('debug')
47 | logger.info('info')
48 | logger.warning('warning')
49 | logger.error('error')
50 | logger.critical('critiacal')
51 | if __name__ == '__main__':
52 | main()
53 |
--------------------------------------------------------------------------------
/utils/lr_helper.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.optim import Optimizer
3 |
4 | class _IterLRScheduler(object):
5 | def __init__(self, optimizer, last_iter=-1):
6 | if not isinstance(optimizer, Optimizer):
7 | raise TypeError('{} is not an Optimizer'.format(
8 | type(optimizer).__name__))
9 | self.optimizer = optimizer
10 | if last_iter == -1:
11 | for group in optimizer.param_groups:
12 | group.setdefault('initial_lr', group['lr'])
13 | else:
14 | for i, group in enumerate(optimizer.param_groups):
15 | if 'initial_lr' not in group:
16 | raise KeyError("param 'initial_lr' is not specified "
17 | "in param_groups[{}] when resuming an optimizer".format(i))
18 | self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
19 | self.step(last_iter + 1)
20 | self.last_iter = last_iter
21 |
22 | def get_lr(self):
23 | raise NotImplementedError
24 |
25 | def step(self, iter=None):
26 | if iter is None:
27 | iter = self.last_iter + 1
28 | self.last_iter = iter
29 | for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
30 | param_group['lr'] = lr
31 |
32 |
33 | class IterExponentialLR(_IterLRScheduler):
34 | """Set the learning rate of each parameter group to the initial lr decayed
35 | by gamma every iteration. When last_iter=-1, sets initial lr as lr.
36 |
37 | Args:
38 | optimizer (Optimizer): Wrapped optimizer.
39 | gamma (float): Multiplicative factor of learning rate decay.
40 | last_iter (int): The index of last iter. Default: -1.
41 | """
42 |
43 | def __init__(self, optimizer, gamma, last_iter=-1):
44 | self.gamma = gamma
45 | super(IterExponentialLR, self).__init__(optimizer, last_iter)
46 |
47 | def get_lr(self):
48 | return [base_lr * self.gamma ** self.last_iter
49 | for base_lr in self.base_lrs]
50 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from .._ext import roi_pooling
4 | import pdb
5 |
6 | class RoIPoolFunction(Function):
7 | def __init__(ctx, pooled_height, pooled_width, spatial_scale):
8 | ctx.pooled_width = pooled_width
9 | ctx.pooled_height = pooled_height
10 | ctx.spatial_scale = spatial_scale
11 | ctx.feature_size = None
12 |
13 | def forward(ctx, features, rois):
14 | ctx.feature_size = features.size()
15 | batch_size, num_channels, data_height, data_width = ctx.feature_size
16 | num_rois = rois.size(0)
17 | output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 | ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 | ctx.rois = rois
20 | if not features.is_cuda:
21 | _features = features.permute(0, 2, 3, 1)
22 | roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 | _features, rois, output)
24 | else:
25 | assert(features.is_contiguous())
26 | assert(rois.is_contiguous())
27 | roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
28 | features, rois, output, ctx.argmax)
29 |
30 | return output
31 |
32 | def backward(ctx, grad_output):
33 | assert(ctx.feature_size is not None and grad_output.is_cuda)
34 | batch_size, num_channels, data_height, data_width = ctx.feature_size
35 | grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
36 |
37 | assert(grad_output.is_contiguous())
38 | assert(ctx.rois.is_contiguous())
39 | roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
40 | grad_output, ctx.rois, grad_input, ctx.argmax)
41 |
42 | return grad_input, None
43 |
--------------------------------------------------------------------------------
/extensions/_roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from .._ext import roi_align
4 |
5 |
6 | # TODO use save_for_backward instead
7 | class RoIAlignFunction(Function):
8 | def __init__(self, aligned_height, aligned_width, spatial_scale):
9 | self.aligned_width = int(aligned_width)
10 | self.aligned_height = int(aligned_height)
11 | self.spatial_scale = float(spatial_scale)
12 | self.rois = None
13 | self.feature_size = None
14 |
15 | def forward(self, features, rois):
16 | self.rois = rois
17 | self.feature_size = features.size()
18 |
19 | batch_size, num_channels, data_height, data_width = features.size()
20 | num_rois = rois.size(0)
21 |
22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 | assert(features.is_contiguous())
24 | assert(rois.is_contiguous())
25 | if features.is_cuda:
26 | roi_align.roi_align_forward_cuda(self.aligned_height,
27 | self.aligned_width,
28 | self.spatial_scale, features,
29 | rois, output)
30 | else:
31 | raise NotImplementedError
32 |
33 | return output
34 |
35 | def backward(self, grad_output):
36 | assert(self.feature_size is not None and grad_output.is_cuda)
37 |
38 | batch_size, num_channels, data_height, data_width = self.feature_size
39 |
40 | grad_input = self.rois.new(batch_size, num_channels, data_height,
41 | data_width).zero_()
42 | assert(grad_output.is_contiguous())
43 | assert(self.rois.is_contiguous())
44 | roi_align.roi_align_backward_cuda(self.aligned_height,
45 | self.aligned_width,
46 | self.spatial_scale, grad_output,
47 | self.rois, grad_input)
48 |
49 | # print grad_input
50 |
51 | return grad_input, None
52 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/modules/roi_pool_py.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 | import numpy as np
5 |
6 |
7 | class RoIPool(nn.Module):
8 | def __init__(self, pooled_height, pooled_width, spatial_scale):
9 | super(RoIPool, self).__init__()
10 | self.pooled_width = int(pooled_width)
11 | self.pooled_height = int(pooled_height)
12 | self.spatial_scale = float(spatial_scale)
13 |
14 | def forward(self, features, rois):
15 | batch_size, num_channels, data_height, data_width = features.size()
16 | num_rois = rois.size()[0]
17 | outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
18 |
19 | for roi_ind, roi in enumerate(rois):
20 | batch_ind = int(roi[0].data[0])
21 | roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
22 | roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
23 | roi_width = max(roi_end_w - roi_start_w + 1, 1)
24 | roi_height = max(roi_end_h - roi_start_h + 1, 1)
25 | bin_size_w = float(roi_width) / float(self.pooled_width)
26 | bin_size_h = float(roi_height) / float(self.pooled_height)
27 |
28 | for ph in range(self.pooled_height):
29 | hstart = int(np.floor(ph * bin_size_h))
30 | hend = int(np.ceil((ph + 1) * bin_size_h))
31 | hstart = min(data_height, max(0, hstart + roi_start_h))
32 | hend = min(data_height, max(0, hend + roi_start_h))
33 | for pw in range(self.pooled_width):
34 | wstart = int(np.floor(pw * bin_size_w))
35 | wend = int(np.ceil((pw + 1) * bin_size_w))
36 | wstart = min(data_width, max(0, wstart + roi_start_w))
37 | wend = min(data_width, max(0, wend + roi_start_w))
38 |
39 | is_empty = (hend <= hstart) or(wend <= wstart)
40 | if is_empty:
41 | outputs[roi_ind, :, ph, pw] = 0
42 | else:
43 | data = features[batch_ind]
44 | outputs[roi_ind, :, ph, pw] = torch.max(
45 | torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1)
46 |
47 | return outputs
48 |
49 |
--------------------------------------------------------------------------------
/datasets/pycocotools/common/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 |
9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 |
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 |
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 |
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 |
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 |
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 |
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 |
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 |
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 |
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 |
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 |
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 |
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 |
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 |
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 |
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 |
--------------------------------------------------------------------------------
/utils/load_helper.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import logging
3 | import pprint
4 | logger = logging.getLogger('global')
5 |
6 | def check_keys(model, pretrained_state_dict):
7 | ckpt_keys = set(pretrained_state_dict.keys())
8 | model_keys = set(model.state_dict().keys())
9 | used_pretrained_keys = model_keys & ckpt_keys
10 | unused_pretrained_keys = ckpt_keys - model_keys
11 | missing_keys = model_keys - ckpt_keys
12 | pprint.pprint(model_keys)
13 | pprint.pprint(ckpt_keys)
14 | logger.info('missing keys:{}'.format(len(missing_keys)))
15 | logger.info('unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
16 | logger.info('used keys:{}'.format(len(used_pretrained_keys)))
17 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
18 | return True
19 |
20 |
21 | def remove_prefix(state_dict, prefix):
22 | ''' Old style model is stored with all names of parameters share common prefix 'module.' '''
23 | logger.info('remove prefix \'{}\''.format(prefix))
24 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
25 | return {f(key): value for key, value in state_dict.items()}
26 |
27 |
28 | def load_pretrain(model, pretrained_path):
29 | logger.info('load pretrained model from {}'.format(pretrained_path))
30 | device = torch.cuda.current_device()
31 | pretrained_dict = torch.load(pretrained_path, map_location = lambda storage, loc: storage.cuda(device))
32 | if pretrained_path.endswith('tar'):
33 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
34 | else:
35 | pretrained_dict = remove_prefix(pretrained_dict, 'module.')
36 | check_keys(model, pretrained_dict)
37 | model.load_state_dict(pretrained_dict, strict=False)
38 | return model
39 |
40 |
41 | def restore_from(model, optimizer, ckpt_path):
42 | logger.info('restore from {}'.format(ckpt_path))
43 | device = torch.cuda.current_device()
44 | ckpt = torch.load(ckpt_path, map_location = lambda storage, loc: storage.cuda(device))
45 | epoch = ckpt['epoch']
46 | best_recall = ckpt['best_recall']
47 | arch = ckpt['arch']
48 | ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.')
49 | check_keys(model, ckpt_model_dict)
50 | model.load_state_dict(ckpt_model_dict, strict=False)
51 |
52 | # optimizer.load_state_dict(ckpt['optimizer'])
53 | optimizer = None
54 | return model, optimizer, epoch, best_recall, arch
55 |
--------------------------------------------------------------------------------
/extensions/_nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | #include "cuda/nms_kernel.h"
13 |
14 |
15 | extern THCState *state;
16 |
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 | // boxes has to be sorted
19 | THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 | THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 | // Number of ROIs
22 | int boxes_num = THCudaTensor_size(state, boxes, 0);
23 | int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 |
25 | float* boxes_flat = THCudaTensor_data(state, boxes);
26 |
27 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 | THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 | unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 |
31 | _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 |
33 | THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 | THLongTensor_copyCuda(state, mask_cpu, mask);
35 | THCudaLongTensor_free(state, mask);
36 |
37 | unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 |
39 | THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 | unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 | THLongTensor_fill(remv_cpu, 0);
42 |
43 | long * keep_flat = THLongTensor_data(keep);
44 | long num_to_keep = 0;
45 |
46 | int i, j;
47 | for (i = 0; i < boxes_num; i++) {
48 | int nblock = i / threadsPerBlock;
49 | int inblock = i % threadsPerBlock;
50 |
51 | if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 | keep_flat[num_to_keep++] = i;
53 | unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 | for (j = nblock; j < col_blocks; j++) {
55 | remv_cpu_flat[j] |= p[j];
56 | }
57 | }
58 | }
59 |
60 | long * num_out_flat = THLongTensor_data(num_out);
61 | * num_out_flat = num_to_keep;
62 |
63 | THLongTensor_free(mask_cpu);
64 | THLongTensor_free(remv_cpu);
65 |
66 | return 1;
67 | }
68 |
--------------------------------------------------------------------------------
/datasets/target_dataset.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 18-5-3 4:40
3 | # @Author : Xinge
4 |
5 | from __future__ import division
6 | import torch
7 | from torch.utils.data import DataLoader, Dataset
8 | import torchvision.transforms as transforms
9 | import numpy as np
10 | from io import StringIO
11 | from PIL import Image
12 | import pickle as pk
13 | import os
14 |
15 | def pil_loader(img_str):
16 | #buff = StringIO.StringIO()
17 | buff = StringIO()
18 | buff.write(img_str)
19 | buff.seek(0)
20 | with Image.open(buff) as img:
21 | return img.convert('RGB')
22 |
23 | class TargetDataset(Dataset):
24 | def __init__(self, root_dir, list_file, normalize_fn=None, memcached=False, new_w=1024, new_h=512):
25 | # self.logger = logging.getLogger('global')
26 | self.root_dir = root_dir
27 | # self.transform_fn = transform_fn
28 | self.normalize_fn = normalize_fn
29 | self.new_w = new_w
30 | self.new_h = new_h
31 | # self.memcached = memcached
32 | with open(list_file) as f:
33 | lines = f.readlines()
34 | self.metas = [x.strip() for x in lines]
35 |
36 | self.num = len(self.metas)
37 | # # aspect ratio of images for sampler sort
38 | # self.aspect_ratios = [float(m[1]) / m[2] for m in self.metas]
39 |
40 | def __len__(self):
41 | return self.num
42 |
43 | def __getitem__(self, idx):
44 | filename = os.path.join(self.root_dir, self.metas[idx])
45 | # h, w, bbox, labels, ignores = self.metas[idx][1:]
46 | # bbox = bbox.astype(np.float32)
47 | # ignores = ignores.astype(np.float32)
48 | # labels = labels.astype(np.float32)
49 | img = Image.open(filename)
50 | if img.mode == 'L':
51 | img = img.convert('RGB')
52 | # assert (img.size[0] == w and img.size[1] == h)
53 | ## det transform
54 | img = self.transform(img, self.new_w, self.new_h)
55 | # new_w, new_h = img.size
56 | ## to tensor
57 | to_tensor = transforms.ToTensor()
58 | img = to_tensor(img)
59 | if self.normalize_fn != None:
60 | img = self.normalize_fn(img)
61 | # bbox = np.hstack([bbox, labels[:, np.newaxis]])
62 | return img
63 |
64 |
65 | def transform(self, img, new_w, new_h):
66 | """transform
67 |
68 | :param img:
69 | :param lbl:
70 | """
71 | new_img = img.resize((new_w, new_h))
72 | return new_img
--------------------------------------------------------------------------------
/utils/coco_eval.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | from datasets.pycocotools.coco import COCO
4 | from datasets.pycocotools.cocoeval import COCOeval
5 | from datasets.coco_dataset import COCODataset
6 | import sys,os
7 | import re
8 | import logging
9 |
10 | logger = logging.getLogger('global')
11 | def eval_coco_ap_from_results_txt(result_dir, test_type, anno_file):
12 | logger.info("start eval coco ...")
13 |
14 | assert(test_type in ['segm', 'bbox', 'keypoints', 'person_bbox', 'person_proposal', 'proposal'])
15 |
16 | category_ids = set()
17 | coco_gt = COCO(anno_file)
18 | for anno in coco_gt.anns.values():
19 | category_ids.add(anno['category_id'])
20 | class_to_category = {i+1:c for i, c in enumerate(sorted(category_ids))}
21 |
22 | all_res = []
23 | for f in os.listdir(result_dir):
24 | if 'results.txt.rank' in f:
25 | for aline in open(os.path.join(result_dir, f),'r'):
26 | aline = aline.rstrip().split()
27 | res = {}
28 | res["image_id"] = int(re.split('[/.]', aline[0])[-2])
29 | x1 = float(aline[1])
30 | y1 = float(aline[2])
31 | x2 = float(aline[3])
32 | y2 = float(aline[4])
33 | if test_type == 'proposal':
34 | res["bbox"] = [x1, y1, x2, y2]
35 | res["score"]= float(aline[-1])
36 | res["category_id"] = 1
37 | else:
38 | res["bbox"] = [x1, y1, x2 - x1, y2 - y1]
39 | res["score"]= float(aline[-2])
40 | res["category_id"] = class_to_category[int(aline[-1])]
41 | all_res.append(res)
42 |
43 | logger.info("all res line: {}".format(len(all_res)))
44 |
45 | #prefix = {'keypoints':'person_keypoints', 'person_bbox':'person_keypoints',
46 | # 'bbox':'instances', 'segm':'instances',
47 | # 'proposal': 'instances', 'person_proposal':'person_keypoints'}[test_type]
48 | iou_type = {'keypoints':'keypoints', 'person_bbox':'bbox',
49 | 'bbox':'bbox', 'segm':'segm',
50 | 'proposal': 'bbox', 'person_proposal':'bbox'}[test_type]
51 |
52 | logger.info('loading annotations from %s\n' % anno_file)
53 | coco_dt = coco_gt.loadRes(all_res)
54 | coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
55 |
56 | if test_type.find('proposal') >= 0:
57 | coco_eval.params.useCats = 0
58 | coco_eval.params.maxDets = [1,100,1000]
59 | coco_eval.evaluate()
60 | coco_eval.accumulate()
61 | coco_eval.summarize()
62 |
63 |
--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "roi_align_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
9 | {
10 | // Grab the input tensor
11 | float * data_flat = THCudaTensor_data(state, features);
12 | float * rois_flat = THCudaTensor_data(state, rois);
13 |
14 | float * output_flat = THCudaTensor_data(state, output);
15 |
16 | // Number of ROIs
17 | int num_rois = THCudaTensor_size(state, rois, 0);
18 | int size_rois = THCudaTensor_size(state, rois, 1);
19 | if (size_rois != 5)
20 | {
21 | return 0;
22 | }
23 |
24 | // data height
25 | int data_height = THCudaTensor_size(state, features, 2);
26 | // data width
27 | int data_width = THCudaTensor_size(state, features, 3);
28 | // Number of channels
29 | int num_channels = THCudaTensor_size(state, features, 1);
30 |
31 | cudaStream_t stream = THCState_getCurrentStream(state);
32 |
33 | ROIAlignForwardLaucher(
34 | data_flat, spatial_scale, num_rois, data_height,
35 | data_width, num_channels, aligned_height,
36 | aligned_width, rois_flat,
37 | output_flat, stream);
38 |
39 | return 1;
40 | }
41 |
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 | // Grab the input tensor
46 | float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 | float * rois_flat = THCudaTensor_data(state, rois);
48 |
49 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 |
51 | // Number of ROIs
52 | int num_rois = THCudaTensor_size(state, rois, 0);
53 | int size_rois = THCudaTensor_size(state, rois, 1);
54 | if (size_rois != 5)
55 | {
56 | return 0;
57 | }
58 |
59 | // batch size
60 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 | // data height
62 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 | // data width
64 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 | // Number of channels
66 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 |
68 | cudaStream_t stream = THCState_getCurrentStream(state);
69 | ROIAlignBackwardLaucher(
70 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 | data_width, num_channels, aligned_height,
72 | aligned_width, rois_flat,
73 | bottom_grad_flat, stream);
74 |
75 | return 1;
76 | }
77 |
--------------------------------------------------------------------------------
/datasets/example_loader.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 |
3 | import torch
4 | import torch.nn.functional as F
5 | import numpy as np
6 | import logging
7 |
8 | class ExampleDataLoader(torch.utils.data.DataLoader):
9 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
10 | num_workers=0, pin_memory=False, drop_last=False):
11 | super(ExampleDataLoader, self).__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
12 | num_workers, self._collate_fn, pin_memory, drop_last)
13 | def _collate_fn(self, batch):
14 | batch_size = len(batch)
15 |
16 | zip_batch = list(zip(*batch))
17 | images = zip_batch[0]
18 | unpad_image_sizes = zip_batch[1]
19 | ground_truth_bboxes = zip_batch[2]
20 | ignores = zip_batch[3]
21 | filenames = zip_batch[4]
22 |
23 | max_img_h = max([_.shape[-2] for _ in images])
24 | max_img_w = max([_.shape[-1] for _ in images])
25 | max_num_gt_bboxes = max([_.shape[0] for _ in ground_truth_bboxes])
26 | max_num_ig_bboxes = max([_.shape[0] for _ in ignores])
27 |
28 |
29 | padded_images = []
30 | padded_gt_bboxes = []
31 | padded_ig_bboxes = []
32 | for b_ix in range(batch_size):
33 | img = images[b_ix]
34 | # pad zeros to right bottom of each image
35 | pad_size = (0, max_img_w - img.shape[-1], 0, max_img_h - img.shape[-2])
36 | padded_images.append(F.pad(img, pad_size, 'constant', 0).data.cpu())
37 |
38 | # pad zeros to gt_bboxes
39 | gt_bboxes = ground_truth_bboxes[b_ix].numpy()
40 | new_gt_bboxes = np.zeros([max_num_gt_bboxes, gt_bboxes.shape[-1]])
41 | new_gt_bboxes[range(gt_bboxes.shape[0]), :] = gt_bboxes
42 | padded_gt_bboxes.append(new_gt_bboxes)
43 |
44 | # pad zeros to ig_bboxes
45 | ig_bboxes = ignores[b_ix].numpy()
46 | new_ig_bboxes = np.zeros([max_num_ig_bboxes, ig_bboxes.shape[-1]])
47 | new_ig_bboxes[range(ig_bboxes.shape[0]), :] = ig_bboxes
48 | padded_ig_bboxes.append(new_ig_bboxes)
49 |
50 | padded_images = images = torch.cat(padded_images, dim = 0)
51 | padded_gt_bboxes = torch.from_numpy(np.stack(padded_gt_bboxes, axis = 0))
52 | padded_ig_bboxes = torch.from_numpy(np.stack(padded_ig_bboxes, axis = 0))
53 | unpad_image_sizes = torch.stack(unpad_image_sizes, dim = 0)
54 | #logger = logging.getLogger('global')
55 | #logger.debug('{0},{1},{2}'.format(padded_images.shape, padded_gt_bboxes.shape, unpad_image_sizes.shape))
56 | return padded_images, unpad_image_sizes, padded_gt_bboxes, padded_ig_bboxes, filenames
57 |
--------------------------------------------------------------------------------
/extensions/_cython_bbox/cython_bbox.pyx:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 |
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 |
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 |
31 | @cython.boundscheck(False)
32 | def bbox_overlaps(
33 | np.ndarray[DTYPE_t, ndim=2] boxes,
34 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 | """
36 | Parameters
37 | ----------
38 | boxes: (N, 4) ndarray of float
39 | query_boxes: (K, 4) ndarray of float
40 | Returns
41 | -------
42 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 | """
44 | cdef unsigned int N = boxes.shape[0]
45 | cdef unsigned int K = query_boxes.shape[0]
46 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 | cdef DTYPE_t iw, ih, box_area
48 | cdef DTYPE_t ua
49 | cdef unsigned int k, n
50 | with nogil:
51 | for k in range(K):
52 | box_area = (
53 | (query_boxes[k, 2] - query_boxes[k, 0]) *
54 | (query_boxes[k, 3] - query_boxes[k, 1])
55 | )
56 | for n in range(N):
57 | iw = (
58 | min(boxes[n, 2], query_boxes[k, 2]) -
59 | max(boxes[n, 0], query_boxes[k, 0])
60 | )
61 | if iw > 0:
62 | ih = (
63 | min(boxes[n, 3], query_boxes[k, 3]) -
64 | max(boxes[n, 1], query_boxes[k, 1])
65 | )
66 | if ih > 0:
67 | ua = float(
68 | (boxes[n, 2] - boxes[n, 0]) *
69 | (boxes[n, 3] - boxes[n, 1]) +
70 | box_area - iw * ih
71 | )
72 | overlaps[n, k] = iw * ih / ua
73 | return overlaps
74 |
--------------------------------------------------------------------------------
/extensions/_nms/src/nms.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
5 | // boxes has to be sorted
6 | THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
7 | THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
8 | THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
9 | THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 | // Number of ROIs
11 | long boxes_num = THFloatTensor_size(boxes, 0);
12 | long boxes_dim = THFloatTensor_size(boxes, 1);
13 |
14 | long * keep_out_flat = THLongTensor_data(keep_out);
15 | float * boxes_flat = THFloatTensor_data(boxes);
16 | long * order_flat = THLongTensor_data(order);
17 | float * areas_flat = THFloatTensor_data(areas);
18 |
19 | THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 | THByteTensor_fill(suppressed, 0);
21 | unsigned char * suppressed_flat = THByteTensor_data(suppressed);
22 |
23 | // nominal indices
24 | int i, j;
25 | // sorted indices
26 | int _i, _j;
27 | // temp variables for box i's (the box currently under consideration)
28 | float ix1, iy1, ix2, iy2, iarea;
29 | // variables for computing overlap with box j (lower scoring box)
30 | float xx1, yy1, xx2, yy2;
31 | float w, h;
32 | float inter, ovr;
33 |
34 | long num_to_keep = 0;
35 | for (_i=0; _i < boxes_num; ++_i) {
36 | i = order_flat[_i];
37 | if (suppressed_flat[i] == 1) {
38 | continue;
39 | }
40 | keep_out_flat[num_to_keep++] = i;
41 | ix1 = boxes_flat[i * boxes_dim];
42 | iy1 = boxes_flat[i * boxes_dim + 1];
43 | ix2 = boxes_flat[i * boxes_dim + 2];
44 | iy2 = boxes_flat[i * boxes_dim + 3];
45 | iarea = areas_flat[i];
46 | for (_j = _i + 1; _j < boxes_num; ++_j) {
47 | j = order_flat[_j];
48 | if (suppressed_flat[j] == 1) {
49 | continue;
50 | }
51 | xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 | yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 | xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 | yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 | w = fmaxf(0.0, xx2 - xx1 + 1);
56 | h = fmaxf(0.0, yy2 - yy1 + 1);
57 | inter = w * h;
58 | ovr = inter / (iarea + areas_flat[j] - inter);
59 | if (ovr >= nms_overlap_thresh) {
60 | suppressed_flat[j] = 1;
61 | }
62 | }
63 | }
64 |
65 | long *num_out_flat = THLongTensor_data(num_out);
66 | *num_out_flat = num_to_keep;
67 | THByteTensor_free(suppressed);
68 | return 1;
69 | }
--------------------------------------------------------------------------------
/functions/predict_bbox.py:
--------------------------------------------------------------------------------
1 | #encoding:utf8
2 | from utils import bbox_helper
3 | from extensions import nms
4 | import torch
5 | import logging
6 | import numpy as np
7 | def to_np_array(x):
8 | if x is None:
9 | return None
10 | # if isinstance(x, Variable): x = x.data
11 | return x.cpu().data.numpy() if torch.is_tensor(x) else x
12 |
13 | def compute_predicted_bboxes(rois, pred_cls, pred_loc, image_info, cfg):
14 | '''
15 | :param cfg: config
16 | :param rois: [N, k] k>=5, batch_ix, x1, y1, x2, y2
17 | :param pred_cls:[N, num_classes, 1, 1]
18 | :param pred_loc:[N, num_classes * 4, 1, 1]
19 | :param image_info:[N, 3]
20 | :return: bboxes: [M, 7], batch_ix, x1, y1, x2, y2, score, cls
21 | '''
22 | # logger = logging.getLogger('global')
23 | rois, pred_cls, pred_loc = map(to_np_array, [rois, pred_cls, pred_loc])
24 | N, num_classes = pred_cls.shape[0:2]
25 | B = max(rois[:, 0].astype(np.int32))+1
26 | assert(N == rois.shape[0])
27 | nmsed_bboxes = []
28 | for cls in range(1, num_classes):
29 | scores = pred_cls[:, cls].squeeze()
30 | deltas = pred_loc[:, cls*4:cls*4+4].squeeze()
31 | if cfg['bbox_normalize_stats_precomputed']:
32 | deltas = deltas * np.array(cfg['bbox_normalize_stds'])[np.newaxis, :]\
33 | + np.array(cfg['bbox_normalize_means'])[np.newaxis, :]
34 | bboxes = bbox_helper.compute_loc_bboxes(rois[:,1:1+4], deltas)
35 | bboxes = np.hstack([bboxes, scores[:, np.newaxis]])
36 | # for each image, do nms
37 | for b_ix in range(B):
38 | rois_ix = np.where(rois[:, 0] == b_ix)[0]
39 | pre_scores = scores[rois_ix]
40 | pre_bboxes = bboxes[rois_ix]
41 | pre_bboxes[:, :4] = bbox_helper.clip_bbox(pre_bboxes[:,:4], image_info[b_ix])
42 | if cfg['score_thresh'] > 0:
43 | keep_ix = np.where(pre_scores > cfg['score_thresh'])[0]
44 | pre_scores = pre_scores[keep_ix]
45 | pre_bboxes = pre_bboxes[keep_ix]
46 | if pre_scores.size == 0: continue
47 | order = pre_scores.argsort()[::-1]
48 | pre_bboxes = pre_bboxes[order, :]
49 | keep_index = nms(torch.from_numpy(pre_bboxes).float().cuda(), cfg['nms_iou_thresh']).numpy()
50 | post_bboxes = pre_bboxes[keep_index]
51 | batch_ix = np.full(post_bboxes.shape[0], b_ix)
52 | batch_cls = np.full(post_bboxes.shape[0], cls)
53 | post_bboxes = np.hstack([batch_ix[:, np.newaxis], post_bboxes, batch_cls[:, np.newaxis]])
54 | nmsed_bboxes.append(post_bboxes)
55 | nmsed_bboxes = np.vstack(nmsed_bboxes)
56 | if cfg['top_n'] > 0:
57 | top_n_bboxes = []
58 | for b_ix in range(B):
59 | bboxes = nmsed_bboxes[nmsed_bboxes[:, 0] == b_ix]
60 | scores = bboxes[:, -2]
61 | order = scores.argsort()[::-1][:cfg['top_n']]
62 | bboxes = bboxes[order]
63 | top_n_bboxes.append(bboxes)
64 | nmsed_bboxes = np.vstack(top_n_bboxes)
65 | nmsed_bboxes = (torch.from_numpy(nmsed_bboxes)).float().cuda()
66 | return nmsed_bboxes
67 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "roi_pooling_kernel.h"
4 |
5 | extern THCState *state;
6 |
7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
9 | {
10 | // Grab the input tensor
11 | float * data_flat = THCudaTensor_data(state, features);
12 | float * rois_flat = THCudaTensor_data(state, rois);
13 |
14 | float * output_flat = THCudaTensor_data(state, output);
15 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 |
17 | // Number of ROIs
18 | int num_rois = THCudaTensor_size(state, rois, 0);
19 | int size_rois = THCudaTensor_size(state, rois, 1);
20 | if (size_rois != 5)
21 | {
22 | return 0;
23 | }
24 |
25 | // batch size
26 | // int batch_size = THCudaTensor_size(state, features, 0);
27 | // if (batch_size != 1)
28 | // {
29 | // return 0;
30 | // }
31 | // data height
32 | int data_height = THCudaTensor_size(state, features, 2);
33 | // data width
34 | int data_width = THCudaTensor_size(state, features, 3);
35 | // Number of channels
36 | int num_channels = THCudaTensor_size(state, features, 1);
37 |
38 | cudaStream_t stream = THCState_getCurrentStream(state);
39 |
40 | ROIPoolForwardLaucher(
41 | data_flat, spatial_scale, num_rois, data_height,
42 | data_width, num_channels, pooled_height,
43 | pooled_width, rois_flat,
44 | output_flat, argmax_flat, stream);
45 |
46 | return 1;
47 | }
48 |
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 | // Grab the input tensor
53 | float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 | float * rois_flat = THCudaTensor_data(state, rois);
55 |
56 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 | int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 |
59 | // Number of ROIs
60 | int num_rois = THCudaTensor_size(state, rois, 0);
61 | int size_rois = THCudaTensor_size(state, rois, 1);
62 | if (size_rois != 5)
63 | {
64 | return 0;
65 | }
66 |
67 | // batch size
68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 | // if (batch_size != 1)
70 | // {
71 | // return 0;
72 | // }
73 | // data height
74 | int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 | // data width
76 | int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 | // Number of channels
78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 |
80 | cudaStream_t stream = THCState_getCurrentStream(state);
81 | ROIPoolBackwardLaucher(
82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 | data_width, num_channels, pooled_height,
84 | pooled_width, rois_flat,
85 | bottom_grad_flat, argmax_flat, stream);
86 |
87 | return 1;
88 | }
89 |
--------------------------------------------------------------------------------
/extensions/_nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | #include
12 | #include
13 | #include
14 | #include "nms_kernel.h"
15 |
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 | float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 | float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 | float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 | float interS = width * height;
21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 | return interS / (Sa + Sb - interS);
24 | }
25 |
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 | const float *dev_boxes, unsigned long long *dev_mask) {
28 | const int row_start = blockIdx.y;
29 | const int col_start = blockIdx.x;
30 |
31 | // if (row_start > col_start) return;
32 |
33 | const int row_size =
34 | fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 | const int col_size =
36 | fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 |
38 | __shared__ float block_boxes[threadsPerBlock * 5];
39 | if (threadIdx.x < col_size) {
40 | block_boxes[threadIdx.x * 5 + 0] =
41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 | block_boxes[threadIdx.x * 5 + 1] =
43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 | block_boxes[threadIdx.x * 5 + 2] =
45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 | block_boxes[threadIdx.x * 5 + 3] =
47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 | block_boxes[threadIdx.x * 5 + 4] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 | }
51 | __syncthreads();
52 |
53 | if (threadIdx.x < row_size) {
54 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 | const float *cur_box = dev_boxes + cur_box_idx * 5;
56 | int i = 0;
57 | unsigned long long t = 0;
58 | int start = 0;
59 | if (row_start == col_start) {
60 | start = threadIdx.x + 1;
61 | }
62 | for (i = start; i < col_size; i++) {
63 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 | t |= 1ULL << i;
65 | }
66 | }
67 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 | }
70 | }
71 |
72 |
73 | void _nms(int boxes_num, float * boxes_dev,
74 | unsigned long long * mask_dev, float nms_overlap_thresh) {
75 |
76 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 | DIVUP(boxes_num, threadsPerBlock));
78 | dim3 threads(threadsPerBlock);
79 | nms_kernel<<>>(boxes_num,
80 | nms_overlap_thresh,
81 | boxes_dev,
82 | mask_dev);
83 | }
84 |
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 |
--------------------------------------------------------------------------------
/functions/rpn_proposal.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 | from utils import bbox_helper
3 | from utils import anchor_helper
4 | from extensions import nms
5 | import torch
6 | import torch.nn.functional as F
7 | import numpy as np
8 | import logging
9 | logger = logging.getLogger('global')
10 |
11 | def to_np_array(x):
12 | if x is None:
13 | return None
14 | # if isinstance(x, Variable): x = x.data
15 | return x.cpu().data.numpy() if torch.is_tensor(x) else x
16 |
17 | def compute_rpn_proposals(conv_cls, conv_loc, cfg, image_info):
18 | '''
19 | :argument
20 | cfg: configs
21 | conv_cls: FloatTensor, [batch, num_anchors * x, h, w], conv output of classification
22 | conv_loc: FloatTensor, [batch, num_anchors * 4, h, w], conv output of localization
23 | image_info: FloatTensor, [batch, 3], image size
24 | :returns
25 | proposals: Variable, [N, 5], 2-dim: batch_ix, x1, y1, x2, y2
26 | '''
27 |
28 | batch_size, num_anchors_4, featmap_h, featmap_w = conv_loc.shape
29 | # [K*A, 4]
30 | anchors_overplane = anchor_helper.get_anchors_over_plane(featmap_h, featmap_w,
31 | cfg['anchor_ratios'], cfg['anchor_scales'], cfg['anchor_stride'])
32 | B = batch_size
33 | A = num_anchors = num_anchors_4 // 4
34 | assert(A * 4 == num_anchors_4)
35 | K = featmap_h * featmap_w
36 |
37 | cls_view = conv_cls.permute(0, 2, 3, 1).contiguous().view(B, K*A, -1).cpu().numpy()
38 | loc_view = conv_loc.permute(0, 2, 3, 1).contiguous().view(B, K*A, 4).cpu().numpy()
39 | if torch.is_tensor(image_info):
40 | image_info = image_info.cpu().numpy()
41 |
42 | #all_proposals = [bbox_helper.compute_loc_bboxes(anchors_overplane, loc_view[ix]) for ix in range(B)]
43 | # [B, K*A, 4]
44 | #pred_loc = np.stack(all_proposals, axis = 0)
45 | #pred_cls = cls_view
46 | batch_proposals = []
47 | pre_nms_top_n = cfg['pre_nms_top_n']
48 | for b_ix in range(B):
49 | scores = cls_view[b_ix, :, -1] # to compatible with sigmoid
50 | if pre_nms_top_n <= 0 or pre_nms_top_n > scores.shape[0]:
51 | order = scores.argsort()[::-1]
52 | else:
53 | inds = np.argpartition(-scores, pre_nms_top_n)[:pre_nms_top_n]
54 | order = np.argsort(-scores[inds])
55 | order = inds[order]
56 | loc_delta = loc_view[b_ix, order, :]
57 | loc_anchors = anchors_overplane[order, :]
58 | scores = scores[order]
59 | boxes = bbox_helper.compute_loc_bboxes(loc_anchors, loc_delta)
60 | boxes = bbox_helper.clip_bbox(boxes, image_info[b_ix])
61 | proposals = np.hstack([boxes, scores[:, np.newaxis]])
62 | proposals = proposals[(proposals[:, 2] - proposals[:, 0] + 1 >= cfg['roi_min_size'])
63 | & (proposals[:, 3] - proposals[:, 1] + 1 >= cfg['roi_min_size'])]
64 | keep_index = nms(torch.from_numpy(proposals).float().cuda(), cfg['nms_iou_thresh']).numpy()
65 | if cfg['post_nms_top_n'] > 0:
66 | keep_index = keep_index[:cfg['post_nms_top_n']]
67 | proposals = proposals[keep_index]
68 | batch_ix = np.full(keep_index.shape, b_ix)
69 | proposals = np.hstack([batch_ix[:, np.newaxis], proposals])
70 | batch_proposals.append(proposals)
71 | batch_proposals = (torch.from_numpy(np.vstack(batch_proposals))).float()
72 | if batch_proposals.dim() < 2:
73 | batch_proposals.unsqueeze(dim=0)
74 | return batch_proposals
75 |
--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/cuda/iou_overlap_kernel.cu:
--------------------------------------------------------------------------------
1 | // #ifdef __cplusplus
2 | // extern "C" {
3 | // #endif
4 |
5 | #include
6 | #include
7 | #include
8 | #include "iou_overlap_kernel.h"
9 |
10 |
11 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0))
12 |
13 | #define CUDA_1D_KERNEL_LOOP(i, n) \
14 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
15 | i += blockDim.x * gridDim.x)
16 |
17 | // CUDA: grid stride looping
18 | #define CUDA_KERNEL_LOOP(i, n) \
19 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
20 | i < (n); \
21 | i += blockDim.x * gridDim.x)
22 |
23 | //__device__ inline float devIoU(float const * const a, float const * const b) {
24 | // float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
25 | // float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
26 | // float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
27 | // float interS = width * height;
28 | // float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
29 | // float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
30 | // return interS / (Sa + Sb - interS);
31 | //}
32 |
33 | __global__ void IOUOverlapKernel(
34 | const float* bbox1,
35 | const float* bbox2,
36 | const int size_bbox,
37 | const int num_bbox1,
38 | const int num_bbox2,
39 | float* top_data){
40 | CUDA_KERNEL_LOOP(index, num_bbox1 * num_bbox2){
41 | int b1 = index / num_bbox2;
42 | int b2 = index % num_bbox2;
43 |
44 | int base1 = b1 * size_bbox;
45 | float b1_x1 = bbox1[base1];
46 | float b1_y1 = bbox1[base1 + 1];
47 | float b1_x2 = bbox1[base1 + 2];
48 | float b1_y2 = bbox1[base1 + 3];
49 | float b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1);
50 |
51 | int base2 = b2 * size_bbox;
52 | float b2_x1 = bbox2[base2];
53 | float b2_y1 = bbox2[base2 + 1];
54 | float b2_x2 = bbox2[base2 + 2];
55 | float b2_y2 = bbox2[base2 + 3];
56 | float b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1);
57 |
58 | float left = fmaxf(b1_x1, b2_x1), right = fminf(b1_x2, b2_x2);
59 | float top = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
60 | float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
61 | float interS = width * height;
62 | float unionS = fmaxf(b1_area + b2_area - interS, 1.0);
63 | top_data[b1 * num_bbox2 + b2] = interS / unionS;
64 | }
65 | }
66 |
67 | int IOUOverlap(
68 | const float* bboxes1_data,
69 | const float* bboxes2_data,
70 | const int size_bbox,
71 | const int num_bbox1,
72 | const int num_bbox2,
73 | float* top_data,
74 | cudaStream_t stream){
75 | const int kThreadsPerBlock = 1024;
76 | int output_size = num_bbox1 * num_bbox2;
77 | //int output_size = num_bbox1;
78 | cudaError_t err;
79 |
80 | err = cudaGetLastError();
81 | if(cudaSuccess != err)
82 | {
83 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
84 | __LINE__, cudaGetErrorString( err ) );
85 | exit( -1 );
86 | }
87 |
88 | IOUOverlapKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
89 | bboxes1_data, bboxes2_data, size_bbox, num_bbox1, num_bbox2, top_data);
90 |
91 | err = cudaGetLastError();
92 | if(cudaSuccess != err)
93 | {
94 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
95 | __LINE__, cudaGetErrorString( err ) );
96 | exit( -1 );
97 | }
98 |
99 | return 1;
100 | }
101 |
102 | // #ifdef __cplusplus
103 | // }
104 | // #endif
105 |
--------------------------------------------------------------------------------
/utils/anchor_helper.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 | import numpy as np
3 |
4 | def get_anchors_over_grid(ratios, scales, stride):
5 | """
6 | Generate anchor (reference) windows by enumerating aspect ratios X
7 | scales wrt a reference (0, 0, stride-1, stride-1) window.
8 | """
9 | # ratios, scales = np.meshgrid(ratios, scales)
10 | scales = np.array(scales) * stride
11 | return generate_anchors(stride=stride, sizes=scales)
12 |
13 | scales, ratios = np.meshgrid(scales, ratios)
14 | sqrt_ratios = np.sqrt(ratios)
15 | ws = (scales / sqrt_ratios).reshape(-1,1)
16 | hs = (scales * sqrt_ratios).reshape(-1,1)
17 | x = np.round(ws / 2.0)
18 | y = np.round(hs / 2.0)
19 | return np.hstack([-x,-y,x,y]) + stride / 2
20 |
21 | def get_anchors_over_plane(featmap_h, featmap_w, anchor_ratios, anchor_scales, anchor_stride):
22 | # get anchors over one grid
23 | anchors_overgrid = get_anchors_over_grid(anchor_ratios, anchor_scales, anchor_stride)
24 | # spread anchors over each grid
25 | shift_x = np.arange(0, featmap_w) * anchor_stride
26 | shift_y = np.arange(0, featmap_h) * anchor_stride
27 | # [featmap_h, featmap_w]
28 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
29 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
30 | shift_x.ravel(), shift_y.ravel())).transpose()
31 | A = anchors_overgrid.shape[0]
32 | K = shifts.shape[0]
33 | anchors_overplane = (anchors_overgrid.reshape((1, A, 4)) +
34 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
35 | return anchors_overplane.reshape((K * A, 4))
36 |
37 | def generate_anchors(
38 | stride=16, sizes=(32, 64), aspect_ratios=(0.5, 1, 2)
39 | ):
40 | """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
41 | are centered on stride / 2, have (approximate) sqrt areas of the specified
42 | sizes, and aspect ratios as given.
43 | """
44 | return _generate_anchors(
45 | stride,
46 | np.array(sizes, dtype=np.float) / stride,
47 | np.array(aspect_ratios, dtype=np.float)
48 | )
49 | def _generate_anchors(base_size, scales, aspect_ratios):
50 | """Generate anchor (reference) windows by enumerating aspect ratios X
51 | scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
52 | """
53 | anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
54 | anchors = _ratio_enum(anchor, aspect_ratios)
55 | anchors = np.vstack(
56 | [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
57 | )
58 | return anchors
59 | def _ratio_enum(anchor, ratios):
60 | """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
61 | w, h, x_ctr, y_ctr = _whctrs(anchor)
62 | size = w * h
63 | size_ratios = size / ratios
64 | ws = np.round(np.sqrt(size_ratios))
65 | hs = np.round(ws * ratios)
66 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
67 | return anchors
68 | def _scale_enum(anchor, scales):
69 | """Enumerate a set of anchors for each scale wrt an anchor."""
70 | w, h, x_ctr, y_ctr = _whctrs(anchor)
71 | ws = w * scales
72 | hs = h * scales
73 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
74 | return anchors
75 | def _whctrs(anchor):
76 | """Return width, height, x center, and y center for an anchor (window)."""
77 | w = anchor[2] - anchor[0] + 1
78 | h = anchor[3] - anchor[1] + 1
79 | x_ctr = anchor[0] + 0.5 * (w - 1)
80 | y_ctr = anchor[1] + 0.5 * (h - 1)
81 | return w, h, x_ctr, y_ctr
82 | def _mkanchors(ws, hs, x_ctr, y_ctr):
83 | """Given a vector of widths (ws) and heights (hs) around a center
84 | (x_ctr, y_ctr), output a set of anchors (windows).
85 | """
86 | ws = ws[:, np.newaxis]
87 | hs = hs[:, np.newaxis]
88 | anchors = np.hstack(
89 | (
90 | x_ctr - 0.5 * (ws - 1),
91 | y_ctr - 0.5 * (hs - 1),
92 | x_ctr + 0.5 * (ws - 1),
93 | y_ctr + 0.5 * (hs - 1)
94 | )
95 | )
96 | return anchors
97 |
98 |
99 |
--------------------------------------------------------------------------------
/datasets/pycocotools/common/gason.h:
--------------------------------------------------------------------------------
1 | // https://github.com/vivkin/gason - pulled January 10, 2016
2 | #pragma once
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | enum JsonTag {
9 | JSON_NUMBER = 0,
10 | JSON_STRING,
11 | JSON_ARRAY,
12 | JSON_OBJECT,
13 | JSON_TRUE,
14 | JSON_FALSE,
15 | JSON_NULL = 0xF
16 | };
17 |
18 | struct JsonNode;
19 |
20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
22 | #define JSON_VALUE_TAG_MASK 0xF
23 | #define JSON_VALUE_TAG_SHIFT 47
24 |
25 | union JsonValue {
26 | uint64_t ival;
27 | double fval;
28 |
29 | JsonValue(double x)
30 | : fval(x) {
31 | }
32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
35 | }
36 | bool isDouble() const {
37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
38 | }
39 | JsonTag getTag() const {
40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
41 | }
42 | uint64_t getPayload() const {
43 | assert(!isDouble());
44 | return ival & JSON_VALUE_PAYLOAD_MASK;
45 | }
46 | double toNumber() const {
47 | assert(getTag() == JSON_NUMBER);
48 | return fval;
49 | }
50 | char *toString() const {
51 | assert(getTag() == JSON_STRING);
52 | return (char *)getPayload();
53 | }
54 | JsonNode *toNode() const {
55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
56 | return (JsonNode *)getPayload();
57 | }
58 | };
59 |
60 | struct JsonNode {
61 | JsonValue value;
62 | JsonNode *next;
63 | char *key;
64 | };
65 |
66 | struct JsonIterator {
67 | JsonNode *p;
68 |
69 | void operator++() {
70 | p = p->next;
71 | }
72 | bool operator!=(const JsonIterator &x) const {
73 | return p != x.p;
74 | }
75 | JsonNode *operator*() const {
76 | return p;
77 | }
78 | JsonNode *operator->() const {
79 | return p;
80 | }
81 | };
82 |
83 | inline JsonIterator begin(JsonValue o) {
84 | return JsonIterator{o.toNode()};
85 | }
86 | inline JsonIterator end(JsonValue) {
87 | return JsonIterator{nullptr};
88 | }
89 |
90 | #define JSON_ERRNO_MAP(XX) \
91 | XX(OK, "ok") \
92 | XX(BAD_NUMBER, "bad number") \
93 | XX(BAD_STRING, "bad string") \
94 | XX(BAD_IDENTIFIER, "bad identifier") \
95 | XX(STACK_OVERFLOW, "stack overflow") \
96 | XX(STACK_UNDERFLOW, "stack underflow") \
97 | XX(MISMATCH_BRACKET, "mismatch bracket") \
98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \
99 | XX(UNQUOTED_KEY, "unquoted key") \
100 | XX(BREAKING_BAD, "breaking bad") \
101 | XX(ALLOCATION_FAILURE, "allocation failure")
102 |
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 | JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 |
109 | const char *jsonStrError(int err);
110 |
111 | class JsonAllocator {
112 | struct Zone {
113 | Zone *next;
114 | size_t used;
115 | } *head = nullptr;
116 |
117 | public:
118 | JsonAllocator() = default;
119 | JsonAllocator(const JsonAllocator &) = delete;
120 | JsonAllocator &operator=(const JsonAllocator &) = delete;
121 | JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 | x.head = nullptr;
123 | }
124 | JsonAllocator &operator=(JsonAllocator &&x) {
125 | head = x.head;
126 | x.head = nullptr;
127 | return *this;
128 | }
129 | ~JsonAllocator() {
130 | deallocate();
131 | }
132 | void *allocate(size_t size);
133 | void deallocate();
134 | };
135 |
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 |
--------------------------------------------------------------------------------
/functions/proposal_assign.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import logging
4 | #from utils.timer import Timer
5 |
6 | def to_np_array(x):
7 | if x is None:
8 | return None
9 | # if isinstance(x, Variable): x = x.data
10 | return x.cpu().data.numpy() if torch.is_tensor(x) else np.array(x)
11 |
12 | def get_rois_target_levels(levels, base_scale, base_level, rois):
13 | '''assign proposals to different level feature map to roi pooling
14 | Args:
15 | rois: [R, 5], batch_ix,x1,y1,x2,y2
16 | levels: [L], levels. e.g.[2,3,4,5,6]
17 | '''
18 | rois = to_np_array(rois)
19 | w = rois[:, 3] - rois[:, 1] + 1
20 | h = rois[:, 4] - rois[:, 2] + 1
21 | scale = (w * h)**0.5
22 | eps = 1e-6
23 | target_levels = np.floor(base_level + np.log2(scale/base_scale+eps)).astype(np.int32)
24 | min_level, max_level = min(levels), max(levels)
25 | return np.clip(target_levels, min_level, max_level)
26 |
27 | def get_rois_by_level(levels, base_scale, base_level, rois):
28 | rois = to_np_array(rois)
29 | target_lvls = get_rois_target_levels(levels, base_scale, base_level, rois)
30 | rois_by_level, rois_ix_by_level = [], []
31 | for lvl in levels:
32 | ix = np.where(target_lvls == lvl)[0]
33 | rois_by_level.append(rois[ix])
34 | rois_ix_by_level.append(ix)
35 | return rois_by_level, rois_ix_by_level
36 |
37 | def assign_args_by_level(levels, base_scale, base_level, rois, *args):
38 | '''
39 | Args:
40 | rois: [R, 5], batch_ix,x1,y1,x2,y2
41 | levels: [L], levels. e.g.[2,3,4,5,6]
42 | return:
43 | args by level
44 | '''
45 | args_by_level = []
46 | rois = to_np_array(rois)
47 | rois_by_level, rois_ix_by_level = \
48 | get_rois_by_level(levels, base_scale, base_level, rois)
49 |
50 | args_by_level.append(rois_by_level)
51 | for arg in args:
52 | # assign arg to each level
53 | arg = to_np_array(arg)
54 | arg_by_level = []
55 | for ix in rois_ix_by_level:
56 | arg_by_level.append(arg[ix])
57 | args_by_level.append(arg_by_level)
58 | return args_by_level
59 |
60 | def get_proposals_assign(proposals, base_scale=224, layer_index=4):
61 | '''
62 | :arguement
63 | proposals:[N, k], k>=5, batch_idx, x1, y1, x2, y2
64 | base_scale: base scale
65 | layer_index: the layer RoI with wxh=224x22 should be mapped into
66 | returns:
67 | p*: [N, 5]
68 | '''
69 | #logger = logging.getLogger('global')
70 | #p = map(lambda x: x.cpu().numpy() if torch.is_tensor(x) else x, [proposals])
71 | p = to_np_array(proposals)
72 | w = p[:,3] - p[:,1] + 1
73 | h = p[:,4] - p[:,2] + 1
74 | area = (w*h)**0.5
75 | k = np.floor(layer_index + np.log2(area/base_scale))
76 | p2 = p[k <= 2]
77 | p3 = p[k == 3]
78 | p4 = p[k == 4]
79 | p5 = p[k >= 5]
80 | return p2, p3, p4, p5
81 |
82 | def get_rois_assign(rois, cls_targets, loc_targets, loc_weights, base_scale=224, layer_index=4):
83 | #logger = logging.getLogger('global')
84 | #T = Timer()
85 | #roi = rois.data.cpu().numpy()
86 | #cls_t = cls_targets.data.cpu().numpy()
87 | #loc_t = loc_targets.data.cpu().numpy()
88 | #loc_w = loc_weights.data.cpu().numpy()
89 | roi = rois
90 | cls_t = cls_targets
91 | loc_t = loc_targets
92 | loc_w = loc_weights
93 |
94 | w = roi[:,3] - roi[:,1] + 1
95 | h = roi[:,4] - roi[:,2] + 1
96 | area = (w*h)**0.5
97 | k = np.floor(layer_index + np.log2(area/base_scale))
98 | p2 = k <= 2
99 | p3 = k == 3
100 | p4 = k == 4
101 | p5 = k >= 5
102 | roi_new = []
103 | cls_t_new = []
104 | loc_t_new = []
105 | loc_w_new = []
106 | for p in [p2, p3, p4, p5]:
107 | roi_new.append(roi[p])
108 | if np.where(p==True)[0].size > 0:
109 | cls_t_new.append(cls_t[p])
110 | loc_t_new.append(loc_t[p])
111 | loc_w_new.append(loc_w[p])
112 |
113 | cuda_device = rois.device
114 | f = lambda x: (torch.from_numpy(x)).cuda()
115 | cls_ts = f(np.concatenate(cls_t_new)).long()
116 | loc_ts = f(np.vstack(loc_t_new)).float()
117 | loc_ws = f(np.vstack(loc_w_new)).float()
118 | return roi_new, cls_ts, loc_ts, loc_ws
119 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/src/focal_loss_cuda.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "cuda/focal_loss_sigmoid_kernel.h"
6 | #include "cuda/focal_loss_softmax_kernel.h"
7 |
8 | extern THCState *state;
9 |
10 | int focal_loss_sigmoid_forward_cuda(
11 | int N,
12 | THCudaTensor * logits,
13 | THCudaIntTensor * targets,
14 | float weight_pos,
15 | float gamma,
16 | float alpha,
17 | int num_classes,
18 | THCudaTensor * losses){
19 | // Grab the input tensor
20 | float * logits_flat = THCudaTensor_data(state, logits);
21 | int * targets_flat = THCudaIntTensor_data(state, targets);
22 |
23 | float * losses_flat = THCudaTensor_data(state, losses);
24 |
25 | cudaStream_t stream = THCState_getCurrentStream(state);
26 |
27 | SigmoidFocalLossForwardLaucher(
28 | N, logits_flat, targets_flat, weight_pos,
29 | gamma, alpha, num_classes, losses_flat, stream);
30 |
31 | return 1;
32 | }
33 |
34 | int focal_loss_sigmoid_backward_cuda(
35 | int N,
36 | THCudaTensor * logits,
37 | THCudaIntTensor * targets,
38 | THCudaTensor * dX_data,
39 | float weight_pos,
40 | float gamma,
41 | float alpha,
42 | int num_classes){
43 | // Grab the input tensor
44 | float * logits_flat = THCudaTensor_data(state, logits);
45 | int * targets_flat = THCudaIntTensor_data(state, targets);
46 |
47 | float * dX_data_flat = THCudaTensor_data(state, dX_data);
48 |
49 | cudaStream_t stream = THCState_getCurrentStream(state);
50 | SigmoidFocalLossBackwardLaucher(
51 | N, logits_flat, targets_flat, dX_data_flat,
52 | weight_pos, gamma, alpha, num_classes, stream);
53 |
54 | return 1;
55 | }
56 |
57 | int focal_loss_softmax_forward_cuda(
58 | int N,
59 | THCudaTensor * logits,
60 | THCudaIntTensor * targets,
61 | float weight_pos,
62 | float gamma,
63 | float alpha,
64 | int num_classes,
65 | THCudaTensor * losses,
66 | THCudaTensor * priors){
67 | // Grab the input tensor
68 | float * logits_flat = THCudaTensor_data(state, logits);
69 | int * targets_flat = THCudaIntTensor_data(state, targets);
70 |
71 | float * losses_flat = THCudaTensor_data(state, losses);
72 | float * priors_flat = THCudaTensor_data(state, priors);
73 |
74 | cudaStream_t stream = THCState_getCurrentStream(state);
75 |
76 | SoftmaxFocalLossForwardLaucher(
77 | N, logits_flat, targets_flat, weight_pos,
78 | gamma, alpha, num_classes, losses_flat, priors_flat, stream);
79 |
80 | return 1;
81 | }
82 |
83 | int focal_loss_softmax_backward_cuda(
84 | int N,
85 | THCudaTensor * logits,
86 | THCudaIntTensor * targets,
87 | THCudaTensor * dX_data,
88 | float weight_pos,
89 | float gamma,
90 | float alpha,
91 | int num_classes,
92 | THCudaTensor * priors,
93 | THCudaTensor * buff){
94 | // Grab the input tensor
95 | float * logits_flat = THCudaTensor_data(state, logits);
96 | int * targets_flat = THCudaIntTensor_data(state, targets);
97 |
98 | float * dX_data_flat = THCudaTensor_data(state, dX_data);
99 | float * priors_flat = THCudaTensor_data(state, priors);
100 | float * buff_flat = THCudaTensor_data(state, buff);
101 |
102 | cudaStream_t stream = THCState_getCurrentStream(state);
103 | SoftmaxFocalLossBackwardLaucher(
104 | N, logits_flat, targets_flat, dX_data_flat,
105 | weight_pos, gamma, alpha, num_classes, priors_flat, buff_flat, stream);
106 |
107 | return 1;
108 | }
109 |
--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
1 | #include |
2 | #include
3 |
4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
5 | THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
6 | {
7 | // Grab the input tensor
8 | float * data_flat = THFloatTensor_data(features);
9 | float * rois_flat = THFloatTensor_data(rois);
10 |
11 | float * output_flat = THFloatTensor_data(output);
12 |
13 | // Number of ROIs
14 | int num_rois = THFloatTensor_size(rois, 0);
15 | int size_rois = THFloatTensor_size(rois, 1);
16 | // batch size
17 | int batch_size = THFloatTensor_size(features, 0);
18 | if(batch_size != 1)
19 | {
20 | return 0;
21 | }
22 | // data height
23 | int data_height = THFloatTensor_size(features, 1);
24 | // data width
25 | int data_width = THFloatTensor_size(features, 2);
26 | // Number of channels
27 | int num_channels = THFloatTensor_size(features, 3);
28 |
29 | // Set all element of the output tensor to -inf.
30 | THFloatStorage_fill(THFloatTensor_storage(output), -1);
31 |
32 | // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
33 | int index_roi = 0;
34 | int index_output = 0;
35 | int n;
36 | for (n = 0; n < num_rois; ++n)
37 | {
38 | int roi_batch_ind = rois_flat[index_roi + 0];
39 | int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
40 | int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
41 | int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
42 | int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
43 | // CHECK_GE(roi_batch_ind, 0);
44 | // CHECK_LT(roi_batch_ind, batch_size);
45 |
46 | int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
47 | int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
48 | float bin_size_h = (float)(roi_height) / (float)(pooled_height);
49 | float bin_size_w = (float)(roi_width) / (float)(pooled_width);
50 |
51 | int index_data = roi_batch_ind * data_height * data_width * num_channels;
52 | const int output_area = pooled_width * pooled_height;
53 |
54 | int c, ph, pw;
55 | for (ph = 0; ph < pooled_height; ++ph)
56 | {
57 | for (pw = 0; pw < pooled_width; ++pw)
58 | {
59 | int hstart = (floor((float)(ph) * bin_size_h));
60 | int wstart = (floor((float)(pw) * bin_size_w));
61 | int hend = (ceil((float)(ph + 1) * bin_size_h));
62 | int wend = (ceil((float)(pw + 1) * bin_size_w));
63 |
64 | hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
65 | hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
66 | wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
67 | wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
68 |
69 | const int pool_index = index_output + (ph * pooled_width + pw);
70 | int is_empty = (hend <= hstart) || (wend <= wstart);
71 | if (is_empty)
72 | {
73 | for (c = 0; c < num_channels * output_area; c += output_area)
74 | {
75 | output_flat[pool_index + c] = 0;
76 | }
77 | }
78 | else
79 | {
80 | int h, w, c;
81 | for (h = hstart; h < hend; ++h)
82 | {
83 | for (w = wstart; w < wend; ++w)
84 | {
85 | for (c = 0; c < num_channels; ++c)
86 | {
87 | const int index = (h * data_width + w) * num_channels + c;
88 | if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
89 | {
90 | output_flat[pool_index + c * output_area] = data_flat[index_data + index];
91 | }
92 | }
93 | }
94 | }
95 | }
96 | }
97 | }
98 |
99 | // Increment ROI index
100 | index_roi += size_rois;
101 | index_output += pooled_height * pooled_width * num_channels;
102 | }
103 | return 1;
104 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 |
4 | # SCDA
5 |
6 | The project of our work **"Adapting Object Detectors via Selective Cross-Domain Alignment" (CVPR2019)**
7 |
8 | 
9 |
10 |
11 | ## Quick View
12 | Quick view about the paper can be found in this [slide](https://drive.google.com/open?id=1P7gf9dicO1K07l-btBRgG3hZM9ofl0kC)
13 |
14 | ## Installation
15 | #### Requirements
16 | - PyTorch 0.4.1 (also test with 0.4.0)
17 | - torchvision 0.2.1
18 | - OpenCV
19 | - scikit-learn
20 | - Cython
21 | - GCC >= 4.9 (test with 5.4)
22 |
23 | #### Install
24 | 1. cd extensions; ./build_all.sh
25 | 2. cd datasets/pycocotools & make (install pycocotools according to the guideline)
26 |
27 | ## Data Preparation
28 | Download the cityscapes and foggy-cityscapes datasets from [cityscapes](https://www.cityscapes-dataset.com/downloads/).
29 |
30 | We provide the meta-files for training and validation, and you can find them in this [url](https://drive.google.com/open?id=1Cv6pLJh0E5elvhhTcXaH6eZXDvN8KfEX). It consists of train.txt, foggy_train.txt and foggy_val.txt. If you want to train with your own datasets, please custom these meta-files with your setting.
31 |
32 | ## Training
33 | We provide several training scripts for our three-types models. Following with the MMDetection, we use the slurm for distributed training (details can be found [here](https://github.com/open-mmlab/mmdetection/blob/master/tools/slurm_train.sh)).
34 |
35 | 1. you need to modify hyper-parameters in these bash scripts (./example/faster-rcnn/cityscapes/vgg/); For example, train_meta_file, target_meta_file and val_meta_file, etc.
36 | You also need to download these image-net pretrained models, such as vgg16.
37 | 2. the hyper-parameters of detection are placed in config_512.json, and you can custom them.
38 | 3. training:
39 | ```bash
40 | cd ./example/faster-rcnn/cityscapes/vgg/
41 | sh 4cluster.sh # for our type-2
42 |
43 | # Details (we follow the slurm training in MMDetection);
44 | # For the detailed descriptions of these hyperparameters, please refer to the ./tools/faster_rcnn_train_val.py
45 |
46 | #!/bin/bash
47 | ROOT=../../../..
48 | export PYTHONPATH=$ROOT:$PYTHONPATH
49 | #--------------------------
50 | job_name=training_4cluster
51 | ckdir=4cluster
52 | mkdir ./${ckdir}/${job_name}
53 | #--------------------------
54 | PARTITION=$1
55 | GPUS=${5:-8}
56 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
57 |
58 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
59 | --ntasks-per-node=${GPUS_PER_NODE} \
60 | --job-name=${job_name} \
61 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
62 | --config=config_512.json \
63 | --dist=1 \
64 | --fix_num=0 \
65 | --L1=1 \
66 | --cluster_num=4 \
67 | --threshold=128 \
68 | --recon_size=256 \
69 | --port=21603 \
70 | --arch=vgg16_FasterRCNN \
71 | --warmup_epochs=1 \
72 | --lr=0.0000125 \
73 | --step_epochs=16,22 \
74 | --batch-size=1 \
75 | --epochs=25 \
76 | --dataset=cityscapes \
77 | --train_meta_file=/path/to/train.txt \
78 | --target_meta_file=/path/to/foggy_train.txt \
79 | --val_meta_file=/path/to/foggy_val.txt \
80 | --datadir=/path/to/leftImg8bit/ \
81 | --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
82 | --results_dir=${ckdir}/${job_name}/results_dir \
83 | --save_dir=${ckdir}/${job_name} \
84 | 2>&1 | tee ${ckdir}/${job_name}/train.log
85 |
86 | ```
87 |
88 |
89 | ## Evaluation
90 | We provide our pre-trained model in this [url](https://drive.google.com/open?id=1FlQePvlavZVgmzBik6IdcG_xWh0xtROz). You can download it and make a test (please modify these parameters before evaluation).
91 |
92 | We support slurm evaluation and single-gpu evaluation. Please check the eval.sh and eval_single.sh
93 | ```bash
94 | sh eval_single.sh # evaluation with single gpu ==> mAP:33.91
95 | ```
96 |
97 | ## Citation
98 | If you find our work useful in your research, please consider citing our paper:
99 | ```
100 | @inproceedings{zhu2019adapting,
101 | title={Adapting Object Detectors via Selective Cross-Domain Alignment},
102 | author={Zhu, Xinge and Pang, Jiangmiao and Yang, Ceyuan and Shi, Jianping and Lin, Dahua},
103 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
104 | pages={687--696},
105 | year={2019}
106 | }
107 | ```
108 |
109 | ## Acknowledgments
110 | We thanks for the opensource codebases, [mmdetetion](https://github.com/open-mmlab/mmdetection) and [Detectron.pytorch](https://github.com/roytseng-tw/Detectron.pytorch).
111 |
112 |
--------------------------------------------------------------------------------
/utils/bbox_helper.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 |
3 | import numpy as np
4 | import warnings
5 | from extensions._cython_bbox import cython_bbox
6 |
7 |
8 | def bbox_iou_overlaps(b1, b2):
9 | return cython_bbox.bbox_overlaps(b1.astype(np.float32), b2.astype(np.float32))
10 | '''
11 | :argument
12 | b1,b2: [n, k], k>=4, x1,y1,x2,y2,...
13 | :returns
14 | intersection-over-union pair-wise.
15 | '''
16 | area1 = (b1[:, 2] - b1[:, 0]) * (b1[:, 3] - b1[:, 1])
17 | area2 = (b2[:, 2] - b2[:, 0]) * (b2[:, 3] - b2[:, 1])
18 | inter_xmin = np.maximum(b1[:, 0].reshape(-1, 1), b2[:, 0].reshape(1, -1))
19 | inter_ymin = np.maximum(b1[:, 1].reshape(-1, 1), b2[:, 1].reshape(1, -1))
20 | inter_xmax = np.minimum(b1[:, 2].reshape(-1, 1), b2[:, 2].reshape(1, -1))
21 | inter_ymax = np.minimum(b1[:, 3].reshape(-1, 1), b2[:, 3].reshape(1, -1))
22 | inter_h = np.maximum(inter_xmax - inter_xmin, 0)
23 | inter_w = np.maximum(inter_ymax - inter_ymin, 0)
24 | inter_area = inter_h * inter_w
25 | union_area1 = area1.reshape(-1, 1) + area2.reshape(1, -1)
26 | union_area2 = (union_area1 - inter_area)
27 | return inter_area / np.maximum(union_area2, 1)
28 |
29 | def bbox_iof_overlaps(b1, b2):
30 | '''
31 | :argument
32 | b1,b2: [n, k], k>=4 with x1,y1,x2,y2,....
33 | :returns
34 | intersection-over-former-box pair-wise
35 | '''
36 | area1 = (b1[:, 2] - b1[:, 0]) * (b1[:, 3] - b1[:, 1])
37 | # area2 = (b2[:, 2] - b2[:, 0]) * (b2[:, 3] - b2[:, 1])
38 | inter_xmin = np.maximum(b1[:, 0].reshape(-1, 1), b2[:, 0].reshape(1, -1))
39 | inter_ymin = np.maximum(b1[:, 1].reshape(-1, 1), b2[:, 1].reshape(1, -1))
40 | inter_xmax = np.minimum(b1[:, 2].reshape(-1, 1), b2[:, 2].reshape(1, -1))
41 | inter_ymax = np.minimum(b1[:, 3].reshape(-1, 1), b2[:, 3].reshape(1, -1))
42 | inter_h = np.maximum(inter_xmax - inter_xmin, 0)
43 | inter_w = np.maximum(inter_ymax - inter_ymin, 0)
44 | inter_area = inter_h * inter_w
45 | return inter_area / np.maximum(area1[:,np.newaxis], 1)
46 |
47 | def center_to_corner(boxes):
48 | '''
49 | :argument
50 | boxes: [N, 4] of center_x, center_y, w, h
51 | :returns
52 | boxes: [N, 4] of xmin, ymin, xmax, ymax
53 | '''
54 | xmin = boxes[:, 0] - boxes[:, 2] / 2.
55 | ymin = boxes[:, 1] - boxes[:, 3] / 2.
56 | xmax = boxes[:, 0] + boxes[:, 2] / 2.
57 | ymax = boxes[:, 1] + boxes[:, 3] / 2.
58 | return np.vstack([xmin, ymin, xmax, ymax]).transpose()
59 |
60 | def corner_to_center(boxes):
61 | '''
62 | inverse of center_to_corner
63 | '''
64 | cx = (boxes[:, 0] + boxes[:, 2]) / 2.
65 | cy = (boxes[:, 1] + boxes[:, 3]) / 2.
66 | w = (boxes[:, 2] - boxes[:, 0])
67 | h = (boxes[:, 3] - boxes[:, 1])
68 | return np.vstack([cx, cy, w, h]).transpose()
69 |
70 | def compute_loc_targets(raw_bboxes, gt_bboxes):
71 | '''
72 | :argument
73 | raw_bboxes, gt_bboxes:[N, k] first dim must be equal
74 | :returns
75 | loc_targets:[N, 4]
76 | '''
77 | bb = corner_to_center(raw_bboxes) # cx, cy, w, h
78 | gt = corner_to_center(gt_bboxes)
79 | assert (np.all(bb[:, 2] > 0))
80 | assert (np.all(bb[:, 3] > 0))
81 | trgt_ctr_x = (gt[:, 0] - bb[:, 0]) / bb[:, 2]
82 | trgt_ctr_y = (gt[:, 1] - bb[:, 1]) / bb[:, 3]
83 | trgt_w = np.log(gt[:, 2] / bb[:, 2])
84 | trgt_h = np.log(gt[:, 3] / bb[:, 3])
85 | return np.vstack([trgt_ctr_x, trgt_ctr_y, trgt_w, trgt_h]).transpose()
86 |
87 |
88 | def compute_loc_bboxes(raw_bboxes, deltas):
89 | '''
90 | :argument
91 | raw_bboxes, delta:[N, k] first dim must be equal
92 | :returns
93 | bboxes:[N, 4]
94 | '''
95 | with warnings.catch_warnings(record=True) as w:
96 | warnings.simplefilter("always")
97 | bb = corner_to_center(raw_bboxes) # cx, cy, w, h
98 | dt_cx = deltas[:, 0] * bb[:, 2] + bb[:, 0]
99 | dt_cy = deltas[:, 1] * bb[:, 3] + bb[:, 1]
100 | dt_w = np.exp(deltas[:, 2]) * bb[:, 2]
101 | dt_h = np.exp(deltas[:, 3]) * bb[:, 3]
102 | dt = np.vstack([dt_cx, dt_cy, dt_w, dt_h]).transpose()
103 | return center_to_corner(dt)
104 |
105 | def clip_bbox(bbox, img_size):
106 | h, w = img_size[:2]
107 | bbox[:, 0] = np.clip(bbox[:, 0], 0, w - 1)
108 | bbox[:, 1] = np.clip(bbox[:, 1], 0, h - 1)
109 | bbox[:, 2] = np.clip(bbox[:, 2], 0, w - 1)
110 | bbox[:, 3] = np.clip(bbox[:, 3], 0, h - 1)
111 | return bbox
112 |
113 | def compute_recall(box_pred, box_gt):
114 | n_gt = box_gt.shape[0]
115 | if box_pred.size == 0 or n_gt == 0:
116 | return 0, n_gt
117 | ov = bbox_iou_overlaps(box_gt, box_pred)
118 | max_ov = np.max(ov, axis=1)
119 | idx = np.where(max_ov>0.5)[0]
120 | n_rc = idx.size
121 | return n_rc, n_gt
122 |
123 |
--------------------------------------------------------------------------------
/datasets/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | #import pycocotools._mask as _mask
4 | from . import _mask
5 |
6 | # Interface for manipulating masks stored in RLE format.
7 | #
8 | # RLE is a simple yet efficient format for storing binary masks. RLE
9 | # first divides a vector (or vectorized image) into a series of piecewise
10 | # constant regions and then for each piece simply stores the length of
11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
13 | # (note that the odd counts are always the numbers of zeros). Instead of
14 | # storing the counts directly, additional compression is achieved with a
15 | # variable bitrate representation based on a common scheme called LEB128.
16 | #
17 | # Compression is greatest given large piecewise constant regions.
18 | # Specifically, the size of the RLE is proportional to the number of
19 | # *boundaries* in M (or for an image the number of boundaries in the y
20 | # direction). Assuming fairly simple shapes, the RLE representation is
21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
22 | # is substantially lower, especially for large simple objects (large n).
23 | #
24 | # Many common operations on masks can be computed directly using the RLE
25 | # (without need for decoding). This includes computations such as area,
26 | # union, intersection, etc. All of these operations are linear in the
27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
28 | # of the object. Computing these operations on the original mask is O(n).
29 | # Thus, using the RLE can result in substantial computational savings.
30 | #
31 | # The following API functions are defined:
32 | # encode - Encode binary masks using RLE.
33 | # decode - Decode binary masks encoded via RLE.
34 | # merge - Compute union or intersection of encoded masks.
35 | # iou - Compute intersection over union between masks.
36 | # area - Compute area of encoded masks.
37 | # toBbox - Get bounding boxes surrounding encoded masks.
38 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
39 | #
40 | # Usage:
41 | # Rs = encode( masks )
42 | # masks = decode( Rs )
43 | # R = merge( Rs, intersect=false )
44 | # o = iou( dt, gt, iscrowd )
45 | # a = area( Rs )
46 | # bbs = toBbox( Rs )
47 | # Rs = frPyObjects( [pyObjects], h, w )
48 | #
49 | # In the API the following formats are used:
50 | # Rs - [dict] Run-length encoding of binary masks
51 | # R - dict Run-length encoding of binary mask
52 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
53 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
54 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
55 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
56 | # dt,gt - May be either bounding boxes or encoded masks
57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
58 | #
59 | # Finally, a note about the intersection over union (iou) computation.
60 | # The standard iou of a ground truth (gt) and detected (dt) object is
61 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
62 | # For "crowd" regions, we use a modified criteria. If a gt object is
63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
64 | # Choosing gt' in the crowd gt that best matches the dt can be done using
65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
66 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
67 | # For crowd gt regions we use this modified criteria above for the iou.
68 | #
69 | # To compile run "python setup.py build_ext --inplace"
70 | # Please do not contact us for help with compiling.
71 | #
72 | # Microsoft COCO Toolbox. version 2.0
73 | # Data, paper, and tutorials available at: http://mscoco.org/
74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
75 | # Licensed under the Simplified BSD License [see coco/license.txt]
76 |
77 | iou = _mask.iou
78 | merge = _mask.merge
79 | frPyObjects = _mask.frPyObjects
80 |
81 | def encode(bimask):
82 | if len(bimask.shape) == 3:
83 | return _mask.encode(bimask)
84 | elif len(bimask.shape) == 2:
85 | h, w = bimask.shape
86 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
87 |
88 | def decode(rleObjs):
89 | if type(rleObjs) == list:
90 | return _mask.decode(rleObjs)
91 | else:
92 | return _mask.decode([rleObjs])[:,:,0]
93 |
94 | def area(rleObjs):
95 | if type(rleObjs) == list:
96 | return _mask.area(rleObjs)
97 | else:
98 | return _mask.area([rleObjs])[0]
99 |
100 | def toBbox(rleObjs):
101 | if type(rleObjs) == list:
102 | return _mask.toBbox(rleObjs)
103 | else:
104 | return _mask.toBbox([rleObjs])[0]
105 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_sigmoid_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "focal_loss_sigmoid_kernel.h"
5 |
6 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0))
7 |
8 | #define CUDA_1D_KERNEL_LOOP(i, n) \
9 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
10 | i += blockDim.x * gridDim.x)
11 |
12 | __global__ void SigmoidFocalLossKernel(
13 | const int N, const float* logits,
14 | const int* targets, const float weight_pos,
15 | const float gamma, const float alpha,
16 | const int num_classes, float* losses) {
17 | CUDA_1D_KERNEL_LOOP(i, N) {
18 | int d = i % num_classes; //current class
19 | int tmp = i / num_classes; //targets index
20 | int t = targets[tmp];
21 |
22 | // check whether the class is true class or not.
23 | // The target classes are in range 1 - 81 and the d is in range 0-80
24 | // because we predict A*80 dim, so for comparison purpose, compare t and (d+1)
25 | float c1 = (t == (d + 1));
26 | float c2 = (t != -1 & t != (d + 1));
27 |
28 | float Np = max(weight_pos, 1.0);
29 | float zn = (1.0 - alpha) / Np;
30 | float zp = alpha / Np;
31 |
32 | // p = 1. / 1. + expf(-x)
33 | float p = 1. / (1. + expf(-logits[i]));
34 |
35 | // (1 - p)**gamma * log(p) where
36 | float term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
37 | // p**gamma * log(1 - p)
38 | float term2 =
39 | powf(p, gamma) *
40 | (-1. * logits[i] * (logits[i] >= 0) -
41 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
42 |
43 | losses[i] = 0.0;
44 | losses[i] += -c1 * term1 * zp;
45 | losses[i] += -c2 * term2 * zn;
46 | }
47 | }
48 |
49 | __global__ void SigmoidFocalLossGradientKernel(
50 | const int N, const float* logits,
51 | const int* targets, float* dX_data, const float weight_pos,
52 | const float gamma, const float alpha, const int num_classes) {
53 | CUDA_1D_KERNEL_LOOP(i, N) {
54 | int d = i % num_classes; //current class
55 | int tmp = i / num_classes; //targets index
56 | int t = targets[tmp];
57 |
58 | float Np = max(weight_pos, 1.0);
59 | float zn = (1.0 - alpha) / Np;
60 | float zp = alpha / Np;
61 | //int t = targets[n * (H * W * A) + a * (H * W) + y * W + x];
62 |
63 | float c1 = (t == (d + 1));
64 | float c2 = (t != -1 & t != (d + 1));
65 | float p = 1. / (1. + expf(-logits[i]));
66 |
67 | // (1-p)**g * (1 - p - g*p*log(p))
68 | float term1 =
69 | powf((1. - p), gamma) *
70 | (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
71 | // (p**g) * (g*(1-p)*log(1-p) - p)
72 | float term2 =
73 | powf(p, gamma) *
74 | ((-1. * logits[i] * (logits[i] >= 0) -
75 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
76 | (1. - p) * gamma - p);
77 | dX_data[i] = 0.0;
78 | dX_data[i] += -c1 * zp * term1;
79 | dX_data[i] += -c2 * zn * term2;
80 | }
81 | }
82 |
83 | int SigmoidFocalLossForwardLaucher(
84 | const int N, const float* logits,
85 | const int* targets, const float weight_pos,
86 | const float gamma, const float alpha,
87 | const int num_classes, float* losses, cudaStream_t stream){
88 |
89 | const int kThreadsPerBlock = 1024;
90 | int output_size = N;
91 | cudaError_t err;
92 |
93 | err = cudaGetLastError();
94 | if(cudaSuccess != err)
95 | {
96 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
97 | __LINE__, cudaGetErrorString( err ) );
98 | exit( -1 );
99 | }
100 | SigmoidFocalLossKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
101 | N, logits, targets, weight_pos, gamma, alpha, num_classes, losses);
102 | err = cudaGetLastError();
103 | if(cudaSuccess != err)
104 | {
105 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
106 | __LINE__, cudaGetErrorString( err ) );
107 | exit( -1 );
108 | }
109 |
110 | return 1;
111 | }
112 |
113 |
114 | int SigmoidFocalLossBackwardLaucher(
115 | const int N, const float* logits, const int* targets,
116 | float* dX_data, const float weight_pos,
117 | const float gamma, const float alpha, const int num_classes,
118 | cudaStream_t stream){
119 |
120 | const int kThreadsPerBlock = 1024;
121 | int output_size = N;
122 | cudaError_t err;
123 |
124 | SigmoidFocalLossGradientKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
125 | N, logits, targets, dX_data, weight_pos, gamma, alpha, num_classes);
126 | err = cudaGetLastError();
127 | if(cudaSuccess != err)
128 | {
129 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
130 | __LINE__, cudaGetErrorString( err ) );
131 | exit( -1 );
132 | }
133 |
134 | return 1;
135 | }
136 |
137 |
138 |
--------------------------------------------------------------------------------
/functions/anchor_target.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 | # from utils.debug_helper import debugger
3 | from utils import bbox_helper
4 | from utils import anchor_helper
5 | import numpy as np
6 | import torch
7 | import logging
8 | logger = logging.getLogger('global')
9 |
10 | def to_np_array(x):
11 | if x is None:
12 | return None
13 | # if isinstance(x, Variable): x = x.data
14 | return x.cpu().data.numpy() if torch.is_tensor(x) else np.array(x)
15 |
16 | def compute_anchor_targets(feature_size, cfg, ground_truth_bboxes, image_info, ignore_regions = None):
17 | r'''
18 | :argument
19 | cfg.keys(): {
20 | 'anchor_ratios', anchor_scales, anchor_stride,
21 | negative_iou_thresh, ignore_iou_thresh,positive_iou_thresh,
22 | positive_percent, rpn_batch_size
23 | }
24 | feature_size: IntTensor, [4]. i.e. batch, num_anchors * 4, height, width
25 | ground_truth_bboxes: FloatTensor, [batch, max_num_gt_bboxes, 5]
26 | image_info: FloatTensor, [batch, 3]
27 | ignore_regions: FloatTensor, [batch, max_num_ignore_regions, 4]
28 | :returns
29 | cls_targets: Variable, [batch, num_anchors * 1, height, width]
30 | loc_targets, loc_masks: Variable, [batch, num_anchors * 4, height, width]
31 | '''
32 | cuda_device = ground_truth_bboxes.device
33 | ground_truth_bboxes, image_info, ignore_regions = \
34 | map(to_np_array, [ground_truth_bboxes, image_info, ignore_regions])
35 |
36 | batch_size, num_anchors_4, featmap_h, featmap_w = feature_size
37 | num_anchors = num_anchors_4 // 4
38 | assert(num_anchors * 4 == num_anchors_4)
39 | # [K*A, 4]
40 | anchors_overplane = anchor_helper.get_anchors_over_plane(
41 | featmap_h, featmap_w, cfg['anchor_ratios'], cfg['anchor_scales'],
42 | cfg['anchor_stride'])
43 |
44 | B = batch_size
45 | A = num_anchors
46 | K = featmap_h * featmap_w
47 | G = ground_truth_bboxes.shape[1]
48 |
49 | # compute overlaps between anchors and gt_bboxes within each batch
50 | # shape: [B, K*A, G]
51 | overlaps = np.stack([bbox_helper.bbox_iou_overlaps(anchors_overplane,
52 | ground_truth_bboxes[ix]) for ix in range(B)], axis = 0)
53 |
54 | # shape of [B, K*A]
55 | argmax_overlaps = overlaps.argmax(axis = 2)
56 | max_overlaps = overlaps.max(axis = 2)
57 |
58 | # [B, G]
59 | gt_max_overlaps = overlaps.max(axis=1)
60 | # ignore thoese gt_max_overlap too small
61 | gt_max_overlaps[gt_max_overlaps < 0.1] = -1
62 | gt_argmax_b_ix, gt_argmax_ka_ix, gt_argmax_g_ix = \
63 | np.where(overlaps == gt_max_overlaps[:, np.newaxis, :])
64 | # match each anchor to the ground truth bbox
65 | argmax_overlaps[gt_argmax_b_ix, gt_argmax_ka_ix] = gt_argmax_g_ix
66 |
67 | labels = np.empty([B, K*A], dtype=np.int64)
68 | labels.fill(-1)
69 | labels[max_overlaps < cfg['negative_iou_thresh']] = 0
70 |
71 | # remove negatives located in ignore regions
72 | if ignore_regions is not None:
73 | iof_overlaps = np.stack([bbox_helper.bbox_iof_overlaps
74 | (anchors_overplane, ignore_regions[ix]) for ix in range(B)], axis=0)
75 | max_iof_overlaps = iof_overlaps.max(axis=2) # [B, K*A]
76 | labels[max_iof_overlaps > cfg['ignore_iou_thresh']] = -1
77 |
78 | labels[gt_argmax_b_ix, gt_argmax_ka_ix] = 1
79 | labels[max_overlaps > cfg['positive_iou_thresh']] = 1
80 |
81 | # sampling
82 | num_pos_sampling = int(cfg['positive_percent'] * cfg['rpn_batch_size'] * batch_size)
83 | pos_b_ix, pos_ka_ix = np.where(labels > 0)
84 | num_positives = len(pos_b_ix)
85 | if num_positives > num_pos_sampling:
86 | remove_ix = np.random.choice(num_positives, size = num_positives - num_pos_sampling, replace = False)
87 | labels[pos_b_ix[remove_ix], pos_ka_ix[remove_ix]] = -1
88 | num_positives = num_pos_sampling
89 | num_neg_sampling = cfg['rpn_batch_size'] * batch_size - num_positives
90 | neg_b_ix, neg_ka_ix = np.where(labels == 0)
91 | num_negatives = len(neg_b_ix)
92 | if num_negatives > num_neg_sampling:
93 | remove_ix = np.random.choice(num_negatives, size = num_negatives - num_neg_sampling, replace = False)
94 | labels[neg_b_ix[remove_ix], neg_ka_ix[remove_ix]] = -1
95 |
96 | pos_b_ix, pos_ka_ix = np.where(labels > 0)
97 | pos_anchors = anchors_overplane[pos_ka_ix, :]
98 |
99 | pos_target_ix = argmax_overlaps[pos_b_ix, pos_ka_ix]
100 | pos_target_gt = ground_truth_bboxes[pos_b_ix, pos_target_ix]
101 | pos_loc_targets = bbox_helper.compute_loc_targets(pos_anchors, pos_target_gt)
102 |
103 | loc_targets = np.zeros([B, K*A, 4], dtype = np.float32)
104 | loc_targets[pos_b_ix, pos_ka_ix, :] = pos_loc_targets
105 | # loc_weights = np.zeros([B, K*A, 4])
106 | loc_masks = np.zeros([B, K*A, 4], dtype = np.float32)
107 | loc_masks[pos_b_ix, pos_ka_ix, :] = 1.
108 |
109 | # transpose to match the predicted convolution shape
110 |
111 | cls_targets = torch.from_numpy(labels).long().view(B, featmap_h, featmap_w, A).permute(0, 3, 1, 2).cuda().contiguous()
112 | loc_targets = torch.from_numpy(loc_targets).float().view(B, featmap_h, featmap_w, A * 4).permute(0, 3, 1, 2).cuda().contiguous()
113 | loc_masks = torch.from_numpy(loc_masks).float().view(B, featmap_h, featmap_w, A * 4).permute(0, 3, 1, 2).cuda().contiguous()
114 | loc_nomalizer = max(1,len(np.where(labels >= 0)[0]))
115 | logger.debug('positive anchors:%d' % len(pos_b_ix))
116 | return cls_targets, loc_targets, loc_masks, loc_nomalizer
117 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_softmax_kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "focal_loss_softmax_kernel.h"
5 |
6 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0))
7 |
8 | #define CUDA_1D_KERNEL_LOOP(i, n) \
9 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
10 | i += blockDim.x * gridDim.x)
11 |
12 | __global__ void SpatialSoftmaxKernel(const int N, const float* Xdata, float* Pdata,
13 | const int num_classes) {
14 | CUDA_1D_KERNEL_LOOP(index, N / num_classes) {
15 | int base = index * num_classes; //base index
16 |
17 | // Subtract max on each cell for numerical reasons
18 | float max_val = -FLT_MAX;
19 | for(int c = 0; c < num_classes; ++c) {
20 | max_val = max(max_val, Xdata[base + c]);
21 | }
22 | // Exponentiate
23 | float expsum = 0.0f;
24 | for(int c = 0; c < num_classes; ++c) {
25 | float expx = expf(Xdata[base + c] - max_val);
26 | Pdata[base + c] = expx;
27 | expsum += expx;
28 | }
29 | // Normalize
30 | for(int c = 0; c < num_classes; ++c) {
31 | Pdata[base + c] /= expsum;
32 | }
33 | }
34 | }
35 |
36 | __global__ void SoftmaxFocalLossKernel(
37 | const int N,
38 | const float* Pdata, const int* targets, float* losses,
39 | const float weight_pos, const float gamma, const float alpha,
40 | const int num_classes) {
41 | CUDA_1D_KERNEL_LOOP(i, N / num_classes) {
42 |
43 | int base = i * num_classes;
44 | const int label = static_cast(targets[i]);
45 |
46 | float Np = max(weight_pos, 1.0);
47 | float z = (label == 0) * (1 - alpha) / Np +
48 | (label >= 1) * alpha / Np;
49 |
50 | losses[i] = 0.0;
51 | if (label >= 0) {
52 | losses[i] =
53 | -(powf(1.0 - Pdata[base + label], gamma) *
54 | log(max(Pdata[base + label], FLT_MIN))) * z;
55 | }
56 | }
57 | }
58 |
59 | __global__ void SoftmaxFocalLossGradientWeightKernel(
60 | const int N,
61 | const float* Pdata, const int* targets, float* buff,
62 | const float weight_pos, const float gamma, const float alpha,
63 | const int num_classes) {
64 | CUDA_1D_KERNEL_LOOP(i, N / num_classes) {
65 |
66 | int base = i * num_classes;
67 | const int label = static_cast(targets[i]);
68 | float Np = max(weight_pos, 1.0);
69 | float z = (label == 0) * (1 - alpha) / Np +
70 | (label >= 1) * alpha / Np;
71 |
72 | buff[i] = 0.0;
73 | if (label >= 0) {
74 | float onemp = 1. - Pdata[base + label];
75 | float p = Pdata[base + label];
76 | buff[i] =
77 | (-powf(onemp, gamma) +
78 | gamma * powf(onemp, gamma - 1) * p * log(max(p, FLT_MIN))) * z;
79 | }
80 | }
81 | }
82 |
83 |
84 | __global__ void SoftmaxFocalLossGradientKernel(
85 | const int N,
86 | const float* Pdata, const int* targets, const float* buff,
87 | float* dX, const int num_classes) {
88 | CUDA_1D_KERNEL_LOOP(i, N) {
89 |
90 | int ind = i / num_classes;
91 | int cls = i % num_classes;
92 |
93 | const int label = static_cast(targets[ind]);
94 |
95 | float c1 = (label >= 0) * 1.0;
96 | float c2 = (label == cls) * 1.0;
97 | dX[i] = 0.0;
98 | dX[i] = c1 * buff[ind] * (c2 - Pdata[i]);
99 | }
100 | }
101 |
102 | int SoftmaxFocalLossForwardLaucher(
103 | const int N, const float* logits,
104 | const int* targets, const float weight_pos,
105 | const float gamma, const float alpha,
106 | const int num_classes, float* losses,
107 | float* priors, cudaStream_t stream){
108 |
109 | const int kThreadsPerBlock = 1024;
110 | int output_size = N;
111 | cudaError_t err;
112 |
113 | err = cudaGetLastError();
114 | if(cudaSuccess != err)
115 | {
116 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
117 | __LINE__, cudaGetErrorString( err ) );
118 | exit( -1 );
119 | }
120 | SpatialSoftmaxKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
121 | N, logits, priors, num_classes);
122 |
123 | SoftmaxFocalLossKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
124 | N, priors, targets, losses, weight_pos, gamma, alpha, num_classes);
125 |
126 |
127 | err = cudaGetLastError();
128 | if(cudaSuccess != err)
129 | {
130 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
131 | __LINE__, cudaGetErrorString( err ) );
132 | exit( -1 );
133 | }
134 |
135 | return 1;
136 | }
137 |
138 |
139 | int SoftmaxFocalLossBackwardLaucher(
140 | const int N, const float* logits, const int* targets,
141 | float* dX_data, const float weight_pos,
142 | const float gamma, const float alpha, const int num_classes,
143 | const float* priors, float* buff, cudaStream_t stream){
144 |
145 | const int kThreadsPerBlock = 1024;
146 | int output_size = N;
147 | cudaError_t err;
148 |
149 | SoftmaxFocalLossGradientWeightKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
150 | N, priors, targets, buff, weight_pos, gamma, alpha, num_classes);
151 |
152 | SoftmaxFocalLossGradientKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
153 | N, priors, targets, buff, dX_data, num_classes);
154 |
155 | err = cudaGetLastError();
156 | if(cudaSuccess != err)
157 | {
158 | fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
159 | __LINE__, cudaGetErrorString( err ) );
160 | exit( -1 );
161 | }
162 |
163 | return 1;
164 | }
165 |
166 |
167 |
--------------------------------------------------------------------------------
/datasets/coco_loader.py:
--------------------------------------------------------------------------------
1 | #encoding: utf-8
2 |
3 | import torch
4 | import torch.utils.data
5 | import torch.nn.functional as F
6 | from torch.autograd import Variable
7 | import numpy as np
8 | import logging
9 | #logger = logging.getLogger('global')
10 |
11 | def to_np_array(x):
12 | if x is None:
13 | return None
14 | if isinstance(x, Variable): x = x.data
15 | return x.cpu().numpy() if torch.is_tensor(x) else np.array(x)
16 |
17 | class COCODataLoader(torch.utils.data.DataLoader):
18 | def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
19 | num_workers=0, pin_memory=False, drop_last=False):
20 | super(COCODataLoader, self).__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
21 | num_workers, self._collate_fn, pin_memory, drop_last)
22 | def _collate_fn(self, batch):
23 | '''
24 | Return: a mini-batch of data:
25 | image_data: FloatTensor of image, with shape of [b, 3, max_h, max_w]
26 | image_info: np.array of shape [b, 5], (resized_image_h, resized_image_w, resize_scale, origin_image_h, origin_image_w)
27 | bboxes: np.array of shape [b, max_num_gts, 5]
28 | keypoints: np.array of shape[b, max_num_gts, k, 2]
29 | masks: np.array of shape [b, max_num_gts, max_h, max_w]
30 | filename: list of str
31 | '''
32 | batch_size = len(batch)
33 |
34 | zip_batch = list(zip(*batch))
35 | images = zip_batch[0]
36 | unpad_image_sizes = zip_batch[1]
37 | ground_truth_bboxes = zip_batch[2]
38 | ignore_regions = zip_batch[3]
39 | ground_truth_keypoints = zip_batch[4]
40 | ground_truth_masks = zip_batch[5]
41 | filenames = zip_batch[6]
42 | has_keyp = ground_truth_keypoints[0] is not None
43 | has_mask = ground_truth_masks[0] is not None
44 |
45 |
46 | max_img_h = max([_.shape[-2] for _ in images])
47 | max_img_w = max([_.shape[-1] for _ in images])
48 |
49 | max_img_h = int(np.ceil(max_img_h / 128.0) * 128)
50 | max_img_w = int(np.ceil(max_img_w / 128.0) * 128)
51 |
52 | max_num_gt_bboxes = max([_.shape[0] for _ in ground_truth_bboxes])
53 | max_num_ig_bboxes = max([_.shape[0] for _ in ignore_regions])
54 | assert(max_num_gt_bboxes > 0)
55 | assert(max_num_ig_bboxes > 0)
56 |
57 | padded_images = []
58 | padded_gt_bboxes = []
59 | padded_ig_bboxes = []
60 | padded_gt_keypoints = [] if has_keyp else None
61 | padded_gt_masks = [] if has_mask else None
62 | for b_ix in range(batch_size):
63 | img = images[b_ix]
64 |
65 | # pad zeros to right bottom of each image
66 | pad_size = (0, max_img_w - img.shape[-1], 0, max_img_h - img.shape[-2])
67 | padded_images.append(F.pad(img, pad_size, 'constant', 0).data.cpu())
68 |
69 | # pad zeros to gt_bboxes
70 | gt_bboxes = to_np_array(ground_truth_bboxes[b_ix])
71 | new_gt_bboxes = np.zeros([max_num_gt_bboxes, gt_bboxes.shape[-1]])
72 | new_gt_bboxes[range(gt_bboxes.shape[0]), :] = gt_bboxes
73 | padded_gt_bboxes.append(new_gt_bboxes)
74 |
75 | # pad zeros to ig_bboxes
76 | ig_bboxes = to_np_array(ignore_regions[b_ix])
77 | new_ig_bboxes = np.zeros([max_num_ig_bboxes, ig_bboxes.shape[-1]])
78 | new_ig_bboxes[range(ig_bboxes.shape[0]), :] = ig_bboxes
79 | padded_ig_bboxes.append(new_ig_bboxes)
80 |
81 | # pad zero to keypoints
82 | if has_keyp:
83 | keypoints = to_np_array(ground_truth_keypoints[b_ix])
84 | shape = keypoints.shape
85 | new_keypoints = np.zeros([max_num_gt_bboxes, shape[1], shape[2]])
86 | new_keypoints[range(keypoints.shape[0]), ...] = keypoints
87 | padded_gt_keypoints.append(new_keypoints)
88 |
89 | # pad zeros to masks
90 | if has_mask:
91 | # [n, img_h, img_w] -> [n, max_img_h, max_img_w]
92 | masks = torch.from_numpy(ground_truth_masks[b_ix])
93 | masks = F.pad(Variable(masks), pad_size, 'constant', 0).data.cpu()
94 | # [n, max_img_h, max_img_w] -> [max_num_gt_bboxes, max_img_h, max_img_w]
95 | if masks.shape[0] < max_num_gt_bboxes:
96 | pad_masks = masks.new(max_num_gt_bboxes - masks.shape[0], max_img_h, max_img_w).zero_()
97 | masks = torch.cat([masks, pad_masks], dim=0)
98 | padded_gt_masks.append(masks.numpy())
99 |
100 | padded_images = torch.cat(padded_images, dim = 0)
101 | unpad_image_sizes = np.stack(unpad_image_sizes, axis = 0)
102 | stack_fn = lambda x : np.stack(x, axis=0) if x else np.array([])
103 | padded_gt_bboxes = stack_fn(padded_gt_bboxes)
104 | padded_ig_bboxes = stack_fn(padded_ig_bboxes)
105 | padded_gt_keypoints = stack_fn(padded_gt_keypoints)
106 | padded_gt_masks = stack_fn(padded_gt_masks)
107 |
108 | #logger.debug('image.shape:{}'.format(padded_images.shape))
109 | #logger.debug('gt_box.shape:{}'.format(padded_gt_bboxes.shape))
110 | #logger.debug('image_info.shape:{}'.format(unpad_image_sizes.shape))
111 | #logger.debug('gt_kpts.shape:{}'.format(padded_gt_keypoints.shape))
112 | #logger.debug('gt_mask.shape:{}'.format(padded_gt_masks.shape))
113 | return [padded_images,
114 | unpad_image_sizes,
115 | padded_gt_bboxes,
116 | padded_ig_bboxes,
117 | padded_gt_keypoints,
118 | padded_gt_masks,
119 | filenames]
120 |
121 |
122 | def validate(anno_file):
123 | from pycocotools.coco import COCO
124 | coco = COCO(anno_file)
125 | image_a = set()
126 | image_b = set()
127 | for anno in coco.anns.values():
128 | image_a.add(anno['image_id'])
129 | if anno['num_keypoints'] > 0:
130 | image_b.add(anno['image_id'])
131 | print('total images of person :{}\n'.format(len(image_a)))
132 | print('images with annotated keypoints:{}\n'.format(len(image_b)))
133 |
134 |
--------------------------------------------------------------------------------
/extensions/_focal_loss/focal_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Function
3 | from ._ext import focal_loss
4 | import time
5 | import logging
6 |
7 | class SigmoidFocalLossFunction(Function):
8 | def __init__(self, gamma, alpha, num_classes):
9 | self.gamma = gamma
10 | self.alpha = alpha
11 | self.num_classes = num_classes
12 |
13 | self.weight_pos = None
14 | self.preds = None
15 | self.targets = None
16 |
17 | def forward(self, preds, targets, weight_pos):
18 | # preds shape: [Batch * h * w * num_anchors, num_classes]
19 | # targets shape: [Batch * h * w * num_anchors]
20 | preds_size = preds.size()
21 | targets_size = targets.size()
22 |
23 | assert(preds_size[0] == targets_size[0])
24 | assert(preds_size[1] == self.num_classes)
25 |
26 | losses = preds.new(preds_size[0], preds_size[1]).zero_()
27 | weight_pos = float(weight_pos[0])
28 | N = preds_size[0] * preds_size[1]
29 |
30 | assert(losses.is_contiguous())
31 | assert(preds.is_contiguous())
32 | assert(targets.is_contiguous())
33 |
34 | assert(preds.is_cuda and targets.is_cuda)
35 | focal_loss.focal_loss_sigmoid_forward_cuda(N,
36 | preds,
37 | targets,
38 | weight_pos,
39 | self.gamma,
40 | self.alpha,
41 | self.num_classes,
42 | losses)
43 | self.preds = preds
44 | self.targets = targets
45 | self.weight_pos = weight_pos
46 | return torch.cuda.FloatTensor([losses.sum()])
47 |
48 | def backward(self, grad_output):
49 | # grad_output: 1.0 / num_of_gpus
50 | preds_size = self.preds.size()
51 | grad_input = self.preds.new(preds_size[0], preds_size[1]).zero_()
52 | N = preds_size[0] * preds_size[1]
53 |
54 | assert(self.preds.is_contiguous())
55 | assert(self.targets.is_contiguous())
56 | assert(grad_input.is_contiguous())
57 |
58 | assert(self.preds.is_cuda and self.targets.is_cuda and grad_input.is_cuda)
59 | focal_loss.focal_loss_sigmoid_backward_cuda(N,
60 | self.preds,
61 | self.targets,
62 | grad_input,
63 | self.weight_pos,
64 | self.gamma,
65 | self.alpha,
66 | self.num_classes)
67 | grad_input = grad_input * grad_output
68 | return grad_input, None, None
69 |
70 | class SoftmaxFocalLossFunction(Function):
71 | def __init__(self, gamma, alpha, num_classes):
72 | self.gamma = gamma
73 | self.alpha = alpha
74 | self.num_classes = num_classes
75 |
76 | self.weight_pos = None
77 | self.preds = None
78 | self.targets = None
79 |
80 | def forward(self, preds, targets, weight_pos):
81 | # preds shape: [Batch * h * w * num_anchors, num_classes]
82 | # targets shape: [Batch * h * w * num_anchors]
83 | preds_size = preds.size()
84 | targets_size = targets.size()
85 |
86 | assert(preds_size[0] == targets_size[0])
87 | assert(preds_size[1] == self.num_classes)
88 |
89 | losses = preds.new(preds_size[0]).zero_()
90 | priors = preds.new(preds_size[0], preds_size[1]).zero_()
91 |
92 | weight_pos = float(weight_pos[0])
93 | N = preds_size[0] * preds_size[1]
94 |
95 |
96 | assert(losses.is_contiguous())
97 | assert(preds.is_contiguous())
98 | assert(targets.is_contiguous())
99 | assert(priors.is_contiguous())
100 |
101 | assert(preds.is_cuda and targets.is_cuda)
102 | focal_loss.focal_loss_softmax_forward_cuda(N,
103 | preds,
104 | targets,
105 | weight_pos,
106 | self.gamma,
107 | self.alpha,
108 | self.num_classes,
109 | losses,
110 | priors)
111 |
112 | self.preds = preds
113 | self.targets = targets
114 | self.weight_pos = weight_pos
115 | self.priors = priors
116 | return torch.cuda.FloatTensor([losses.sum()])
117 |
118 | def backward(self, grad_output):
119 | # grad_output: 1.0 / num_of_gpus
120 | preds_size = self.preds.size()
121 | grad_input = self.preds.new(preds_size[0], preds_size[1]).zero_()
122 | buff = self.preds.new(preds_size[0]).zero_()
123 | N = preds_size[0] * preds_size[1]
124 |
125 | assert(self.preds.is_contiguous())
126 | assert(self.targets.is_contiguous())
127 | assert(grad_input.is_contiguous())
128 | assert(buff.is_contiguous())
129 |
130 | assert(self.preds.is_cuda and self.targets.is_cuda and grad_input.is_cuda and buff.is_cuda)
131 | focal_loss.focal_loss_softmax_backward_cuda(N,
132 | self.preds,
133 | self.targets,
134 | grad_input,
135 | self.weight_pos,
136 | self.gamma,
137 | self.alpha,
138 | self.num_classes,
139 | self.priors,
140 | buff)
141 | grad_input = grad_input * grad_output
142 | return grad_input, None, None
143 |
--------------------------------------------------------------------------------
/datasets/example_dataset.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import torch
3 | from torch.utils.data import DataLoader, Dataset
4 | import torchvision.transforms as transforms
5 | import numpy as np
6 | from io import StringIO
7 | from PIL import Image
8 | import pickle as pk
9 | import os
10 | import logging
11 |
12 | def pil_loader(img_str):
13 | #buff = StringIO.StringIO()
14 | buff = StringIO()
15 | buff.write(img_str)
16 | buff.seek(0)
17 | with Image.open(buff) as img:
18 | return img.convert('RGB')
19 |
20 | class ExampleDataset(Dataset):
21 | def __init__(self, root_dir, list_file, transform_fn, normalize_fn=None, memcached=False):
22 | #self.logger = logging.getLogger('global')
23 | self.root_dir = root_dir
24 | self.transform_fn = transform_fn
25 | self.normalize_fn = normalize_fn
26 | # self.memcached = memcached
27 |
28 | #self.logger.info("building dataset from %s" % list_file)
29 | save_name = 'meta_%s'%(list_file.split('.')[0].strip('/').replace('/', '_'))
30 | ## load annotations if exist
31 | if os.path.exists(save_name):
32 | with open(save_name, 'rb') as f:
33 | self.metas = pk.load(f)
34 | self.num = len(self.metas)
35 | # aspect ratio of images for sampler sort
36 | self.aspect_ratios = [float(m[1])/m[2] for m in self.metas]
37 | return
38 | ## otherwise parse annotations
39 | with open(list_file) as f:
40 | lines = f.readlines()
41 | self.metas = []
42 | count = 0
43 | i = 0
44 | while i < len(lines):
45 | img_ig = []
46 | img_gt = []
47 | labels = []
48 | img_name = lines[i + 1].rstrip()
49 | img_height = float(lines[i + 3])
50 | img_width = float(lines[i + 4])
51 | img_ig_size = int (lines[i + 6])
52 | i += 7
53 | for j in range(img_ig_size):
54 | sp = lines[i + j].split()
55 | img_ig.append([float(sp[0]), float(sp[1]), float(sp[2]), float(sp[3])])
56 | if len(img_ig) == 0:
57 | img_ig.append([0,0,0,0])
58 | i += img_ig_size
59 | img_gt_size = int(lines[i])
60 | i += 1
61 | for j in range(img_gt_size):
62 | sp = lines[i + j].split()
63 | img_gt.append([float(sp[1]),float(sp[2]),float(sp[3]),float(sp[4])])
64 | labels.append(int(sp[0]))
65 | i += img_gt_size
66 | count += 1
67 | #if count % 100 == 0:
68 | # self.logger.info(count)
69 | self.metas.append([img_name, img_height, img_width, np.array(img_gt), np.array(labels), np.array(img_ig)])
70 | with open(save_name, 'wb') as f:
71 | pk.dump(self.metas, f)
72 | #self.logger.info("read meta done")
73 | self.num = len(self.metas)
74 | # aspect ratio of images for sampler sort
75 | self.aspect_ratios = [float(m[1])/m[2] for m in self.metas]
76 |
77 | def __len__(self):
78 | return self.num
79 |
80 | def __getitem__(self, idx):
81 | filename = os.path.join(self.root_dir, self.metas[idx][0])
82 | h, w, bbox, labels, ignores = self.metas[idx][1:]
83 | bbox = bbox.astype(np.float32)
84 | ignores = ignores.astype(np.float32)
85 | labels = labels.astype(np.float32)
86 | img = Image.open(filename)
87 | if img.mode == 'L':
88 | img = img.convert('RGB')
89 | assert(img.size[0]==w and img.size[1]==h)
90 | ## det transform
91 | img, bbox, resize_scale, ignores = self.transform_fn(img, bbox, ignores)
92 | new_w, new_h = img.size
93 | ## to tensor
94 | to_tensor = transforms.ToTensor()
95 | img = to_tensor(img)
96 | if self.normalize_fn != None:
97 | img = self.normalize_fn(img)
98 | bbox = np.hstack([bbox, labels[:, np.newaxis]])
99 | return [img.unsqueeze(0),
100 | torch.Tensor([new_h, new_w, resize_scale]),
101 | torch.from_numpy(bbox),
102 | torch.from_numpy(ignores),
103 | filename]
104 |
105 |
106 | class ExampleTransform(object):
107 | def __init__(self, sizes, max_size, flip=False):
108 | if not isinstance(sizes, list):
109 | sizes = [sizes]
110 | self.scale_min = min(sizes)
111 | self.scale_max = max(sizes)
112 | self.max_size = max_size
113 | self.flip = flip
114 |
115 | def __call__(self, img, bbox, ignores):
116 |
117 | w, h = img.size
118 | short = min(w, h)
119 | large = max(w, h)
120 |
121 | size = np.random.randint(self.scale_min, self.scale_max + 1)
122 | scale = min(size / short, self.max_size / large)
123 | new_w, new_h = int(w * scale), int(h * scale)
124 |
125 | new_img = img.resize((new_w, new_h))
126 |
127 | new_bbox = np.array(bbox)
128 | new_bbox[:, 0] = np.floor(new_bbox[:, 0] * scale)
129 | new_bbox[:, 1] = np.floor(new_bbox[:, 1] * scale)
130 | new_bbox[:, 2] = np.ceil(new_bbox[:, 2] * scale)
131 | new_bbox[:, 3] = np.ceil(new_bbox[:, 3] * scale)
132 | new_ignores = np.array(ignores)
133 | if new_ignores.shape[0] > 0:
134 | new_ignores[:, 0] = np.floor(new_ignores[:, 0] * scale)
135 | new_ignores[:, 1] = np.floor(new_ignores[:, 1] * scale)
136 | new_ignores[:, 2] = np.ceil(new_ignores[:, 2] * scale)
137 | new_ignores[:, 3] = np.ceil(new_ignores[:, 3] * scale)
138 |
139 | if self.flip:
140 | if np.random.random() < 0.5:
141 | new_img = new_img.transpose(Image.FLIP_LEFT_RIGHT)
142 | new_bbox[:, 0], new_bbox[:, 2] = new_w - new_bbox[:, 2], new_w - new_bbox[:, 0]
143 | if new_ignores.shape[0] > 0:
144 | new_ignores[:, 0], new_ignores[:,2] = new_w - new_ignores[:, 2], new_w - new_ignores[:, 0]
145 | return new_img, new_bbox, scale, new_ignores
146 |
--------------------------------------------------------------------------------
/utils/cal_mAP.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #-------------------------------------------
3 | # cal mAP | base on pytorch example dataset
4 | # for cityscapes specifically
5 | # pang jiangmiao | 2018.04.15
6 | #-------------------------------------------
7 | # import sys
8 | import numpy as np
9 | from collections import defaultdict
10 | import subprocess
11 |
12 | # import pprint
13 | # import pdb
14 | import logging
15 | logger = logging.getLogger('global')
16 | def parse_gts(gts_list, num_classes):
17 | '''parse detection ground truths list
18 | dict[img_name] = {height:, width:, bbox_num:, bbox:{cls:[[x1,y1,x2,y2],...], ...} }
19 | '''
20 | logger.info('Start parsing gts list......')
21 | index_info = [temp for temp in enumerate(gts_list) if temp[1].startswith('#')]
22 | gts = defaultdict(list)
23 | gts['num'] = np.zeros(num_classes)
24 | for i in range(len(index_info)):
25 | index = index_info[i][0]
26 | img_name = gts_list[index + 1].strip() # val/folder/img_name.png
27 | pure_name = img_name.split('/')[-1][0:-4] # img_name
28 | gts[pure_name] = defaultdict(list)
29 | gts[pure_name]['height'] = gts_list[index+3].strip()
30 | gts[pure_name]['width'] = gts_list[index+4].strip()
31 | gts[pure_name]['bbox_num'] = int(gts_list[index+7])
32 | gts[pure_name]['bbox'] = defaultdict(list)
33 | for b in gts_list[index+8:index+8+int(gts_list[index+7])]:
34 | b = b.split()
35 | label = int(b[0])
36 | x1 = int(b[1])
37 | y1 = int(b[2])
38 | x2 = int(b[3])
39 | y2 = int(b[4])
40 | gts[pure_name]['bbox'][label].append([x1, y1, x2, y2])
41 | gts['num'][label] += 1
42 | gts[pure_name]['is_det'] = defaultdict(list)
43 | for l in range(1, num_classes):
44 | gts[pure_name]['is_det'][l] = np.zeros(len(gts[pure_name]['bbox'][l]))
45 | logger.info('Done!')
46 | return gts
47 |
48 | def parse_res(res_list):
49 | '''parse results list
50 | dict[cls] = [[x1, y1, x2, y2, score, img_name], ...]
51 | '''
52 | logger.info('Start parsing results list......')
53 | results = defaultdict(list)
54 | for r in res_list:
55 | r = r.split()
56 | img_name = r[0] # img_name no extension
57 | label = int(r[6])
58 | score = float(r[5])
59 | x1 = int(float(r[1]))
60 | y1 = int(float(r[2]))
61 | x2 = int(float(r[3]))
62 | y2 = int(float(r[4]))
63 | results[label].append([x1, y1, x2, y2, score, img_name])
64 | logger.info('Done!')
65 | return results
66 |
67 | def calIoU(result, gt_i):
68 | # result: [x1, y1, x2, y2, score, img_name]
69 | # gts: [[x1, x2, y1, y2], []...]
70 | x1 = result[0]
71 | y1 = result[1]
72 | x2 = result[2]
73 | y2 = result[3]
74 | overmax = -1
75 | is_which = -1
76 | for k, gt in enumerate(gt_i):
77 | gt_x1 = gt[0]
78 | gt_y1 = gt[1]
79 | gt_x2 = gt[2]
80 | gt_y2 = gt[3]
81 | inter_x1 = max(x1, gt_x1)
82 | inter_y1 = max(y1, gt_y1)
83 | inter_x2 = min(x2, gt_x2)
84 | inter_y2 = min(y2, gt_y2)
85 | if inter_x1 < inter_x2 and inter_y1 < inter_y2:
86 | area_inter = (inter_x2 - inter_x1 + 1) * (inter_y2 - inter_y1 + 1)
87 | area_sum1 = (x2 - x1 + 1) * (y2 - y1 + 1)
88 | area_sum2 = (gt_x2 - gt_x1 + 1) * (gt_y2 - gt_y1 + 1)
89 | IoU = area_inter/(area_sum1 + area_sum2 - area_inter)
90 | if IoU > overmax:
91 | overmax = IoU
92 | is_which = k
93 | return overmax, is_which
94 |
95 | def cal_mAP(gts, results, num_classes, overlap_thre):
96 | ap = np.zeros(num_classes)
97 | max_recall = np.zeros(num_classes)
98 | for class_i in range(1, num_classes):
99 | results_i = results[class_i]
100 | res_num = len(results_i)
101 | tp = np.zeros(res_num)
102 | fp = np.zeros(res_num)
103 | sum_gt = gts['num'][class_i]
104 | logger.info('sum_gt: {}'.format(sum_gt))
105 | results_i = sorted(results_i, key = lambda xx : xx[4], reverse=True)
106 | for k, res in enumerate(results_i):
107 | img_name = res[-1]
108 | gts_i = gts[img_name]['bbox'][int(class_i)]
109 | overmax, is_which = calIoU(res, gts_i)
110 | if overmax >= overlap_thre and gts[img_name]['is_det'][class_i][is_which] == 0:
111 | tp[k] = 1
112 | gts[img_name]['is_det'][class_i][is_which] = 1
113 | else:
114 | fp[k] = 1
115 | rec = np.zeros(res_num)
116 | prec = np.zeros(res_num)
117 | for v in range(res_num):
118 | if v > 0:
119 | tp[v] = tp[v] + tp[v-1]
120 | fp[v] = fp[v] + fp[v-1]
121 | rec[v] = tp[v] / sum_gt
122 | prec[v] = tp[v] / (tp[v] + fp[v])
123 | for v in range(res_num-2, -1, -1):
124 | prec[v] = max(prec[v], prec[v+1])
125 | for v in range(res_num):
126 | if v == 0:
127 | ap[class_i] += rec[v] * prec[v]
128 | else:
129 | ap[class_i] += (rec[v] - rec[v-1]) * prec[v]
130 | max_recall[class_i] = np.max(rec)
131 | logger.info('class {} --- ap: {} max recall: {}'.format(class_i, ap[class_i], max_recall[class_i]))
132 | return ap, max_recall
133 |
134 |
135 | def Cal_MAP1(res_list, gts_list, num_classes):
136 | # with open(res_list, 'r') as f_res:
137 | # res_list = f_res.readlines()
138 | # with open(gts_list, 'r') as f_gts:
139 | # gts_list = f_gts.readlines()
140 | overlap_thre = 0.5
141 | num_classes = int(num_classes)
142 | gts = parse_gts(gts_list, num_classes)
143 | results = parse_res(res_list)
144 |
145 | ap, max_recall = cal_mAP(gts, results, num_classes, overlap_thre)
146 | mAP = np.mean(ap[1:])
147 | m_rec = np.mean(max_recall[1:])
148 | # print('--------------------')
149 | logger.info('mAP: {} max recall: {}'.format(mAP, m_rec))
150 | # print('--------------------')
151 | return mAP
152 |
153 | def Cal_MAP(res_dir, gts_list, num_classes):
154 | overlap_thre = 0.5
155 | res_list = 'results.txt'
156 | subprocess.call("cat {}/results.txt.rank* > {}/{}".format(res_dir,res_dir, res_list), shell=True)
157 |
158 | with open("{}/{}".format(res_dir, res_list), 'r', encoding='utf-8') as f_res:
159 | res_list = f_res.readlines()
160 | with open(gts_list, 'r', encoding='utf-8') as f_gts:
161 | gts_list = f_gts.readlines()
162 |
163 | gts = parse_gts(gts_list, num_classes)
164 | results = parse_res(res_list)
165 |
166 | ap, max_recall = cal_mAP(gts, results, num_classes, overlap_thre)
167 | mAP = np.mean(ap[1:])
168 | m_rec = np.mean(max_recall[1:])
169 | print('--------------------')
170 | print('mAP: {} max recall: {}'.format(mAP, m_rec))
171 | print('--------------------')
172 |
173 |
174 |
175 |
--------------------------------------------------------------------------------
/extensions/_cython_bbox/cython_nms.pyx:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2017-present, Facebook, Inc.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Ross Girshick
22 | # --------------------------------------------------------
23 |
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 |
28 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil:
29 | return a if a >= b else b
30 |
31 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil:
32 | return a if a <= b else b
33 |
34 | @cython.boundscheck(False)
35 | @cython.cdivision(True)
36 | @cython.wraparound(False)
37 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh):
38 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
39 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
40 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
41 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
42 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
43 |
44 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
45 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
46 |
47 | cdef int ndets = dets.shape[0]
48 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
49 | np.zeros((ndets), dtype=np.int)
50 |
51 | # nominal indices
52 | cdef int _i, _j
53 | # sorted indices
54 | cdef int i, j
55 | # temp variables for box i's (the box currently under consideration)
56 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
57 | # variables for computing overlap with box j (lower scoring box)
58 | cdef np.float32_t xx1, yy1, xx2, yy2
59 | cdef np.float32_t w, h
60 | cdef np.float32_t inter, ovr
61 |
62 | with nogil:
63 | for _i in range(ndets):
64 | i = order[_i]
65 | if suppressed[i] == 1:
66 | continue
67 | ix1 = x1[i]
68 | iy1 = y1[i]
69 | ix2 = x2[i]
70 | iy2 = y2[i]
71 | iarea = areas[i]
72 | for _j in range(_i + 1, ndets):
73 | j = order[_j]
74 | if suppressed[j] == 1:
75 | continue
76 | xx1 = max(ix1, x1[j])
77 | yy1 = max(iy1, y1[j])
78 | xx2 = min(ix2, x2[j])
79 | yy2 = min(iy2, y2[j])
80 | w = max(0.0, xx2 - xx1 + 1)
81 | h = max(0.0, yy2 - yy1 + 1)
82 | inter = w * h
83 | ovr = inter / (iarea + areas[j] - inter)
84 | if ovr >= thresh:
85 | suppressed[j] = 1
86 |
87 | return np.where(suppressed == 0)[0]
88 |
89 | # ----------------------------------------------------------
90 | # Soft-NMS: Improving Object Detection With One Line of Code
91 | # Copyright (c) University of Maryland, College Park
92 | # Licensed under The MIT License [see LICENSE for details]
93 | # Written by Navaneeth Bodla and Bharat Singh
94 | # ----------------------------------------------------------
95 | @cython.boundscheck(False)
96 | @cython.cdivision(True)
97 | @cython.wraparound(False)
98 | def soft_nms(
99 | np.ndarray[float, ndim=2] boxes_in,
100 | float sigma=0.5,
101 | float Nt=0.3,
102 | float threshold=0.001,
103 | unsigned int method=0
104 | ):
105 | boxes = boxes_in.copy()
106 | cdef unsigned int N = boxes.shape[0]
107 | cdef float iw, ih, box_area
108 | cdef float ua
109 | cdef int pos = 0
110 | cdef float maxscore = 0
111 | cdef int maxpos = 0
112 | cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
113 | inds = np.arange(N)
114 |
115 | for i in range(N):
116 | maxscore = boxes[i, 4]
117 | maxpos = i
118 |
119 | tx1 = boxes[i,0]
120 | ty1 = boxes[i,1]
121 | tx2 = boxes[i,2]
122 | ty2 = boxes[i,3]
123 | ts = boxes[i,4]
124 | ti = inds[i]
125 |
126 | pos = i + 1
127 | # get max box
128 | while pos < N:
129 | if maxscore < boxes[pos, 4]:
130 | maxscore = boxes[pos, 4]
131 | maxpos = pos
132 | pos = pos + 1
133 |
134 | # add max box as a detection
135 | boxes[i,0] = boxes[maxpos,0]
136 | boxes[i,1] = boxes[maxpos,1]
137 | boxes[i,2] = boxes[maxpos,2]
138 | boxes[i,3] = boxes[maxpos,3]
139 | boxes[i,4] = boxes[maxpos,4]
140 | inds[i] = inds[maxpos]
141 |
142 | # swap ith box with position of max box
143 | boxes[maxpos,0] = tx1
144 | boxes[maxpos,1] = ty1
145 | boxes[maxpos,2] = tx2
146 | boxes[maxpos,3] = ty2
147 | boxes[maxpos,4] = ts
148 | inds[maxpos] = ti
149 |
150 | tx1 = boxes[i,0]
151 | ty1 = boxes[i,1]
152 | tx2 = boxes[i,2]
153 | ty2 = boxes[i,3]
154 | ts = boxes[i,4]
155 |
156 | pos = i + 1
157 | # NMS iterations, note that N changes if detection boxes fall below
158 | # threshold
159 | while pos < N:
160 | x1 = boxes[pos, 0]
161 | y1 = boxes[pos, 1]
162 | x2 = boxes[pos, 2]
163 | y2 = boxes[pos, 3]
164 | s = boxes[pos, 4]
165 |
166 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
167 | iw = (min(tx2, x2) - max(tx1, x1) + 1)
168 | if iw > 0:
169 | ih = (min(ty2, y2) - max(ty1, y1) + 1)
170 | if ih > 0:
171 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
172 | ov = iw * ih / ua #iou between max box and detection box
173 |
174 | if method == 1: # linear
175 | if ov > Nt:
176 | weight = 1 - ov
177 | else:
178 | weight = 1
179 | elif method == 2: # gaussian
180 | weight = np.exp(-(ov * ov)/sigma)
181 | else: # original NMS
182 | if ov > Nt:
183 | weight = 0
184 | else:
185 | weight = 1
186 |
187 | boxes[pos, 4] = weight*boxes[pos, 4]
188 |
189 | # if box score falls below threshold, discard the box by
190 | # swapping with last box update N
191 | if boxes[pos, 4] < threshold:
192 | boxes[pos,0] = boxes[N-1, 0]
193 | boxes[pos,1] = boxes[N-1, 1]
194 | boxes[pos,2] = boxes[N-1, 2]
195 | boxes[pos,3] = boxes[N-1, 3]
196 | boxes[pos,4] = boxes[N-1, 4]
197 | inds[pos] = inds[N-1]
198 | N = N - 1
199 | pos = pos - 1
200 |
201 | pos = pos + 1
202 |
203 | return boxes[:N], inds[:N]
204 |
--------------------------------------------------------------------------------
/models/losses.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 18-4-19
3 | # @Author : Xinge
4 | # import torch
5 | import torch.nn as nn
6 | # import numpy as np
7 | import torch
8 | import torch.nn.functional as F
9 | from torch.autograd import Variable, Function
10 | # import numpy as np
11 | from math import exp
12 |
13 |
14 | class Losses(nn.Module):
15 | def __init__(self):
16 | super(Losses, self).__init__()
17 | # self.loss = nn.functional.kl_div
18 |
19 |
20 | def forward(self, input1, input2):
21 | """
22 | KL divergence loss
23 | :param input1:
24 | :param input2:
25 | :return:
26 | """
27 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
28 | # assert input1.size() == 2, "more than two dimensions"
29 | input1 = nn.functional.log_softmax(input1, dim = 1)
30 | input2 = nn.functional.softmax(input2, dim = 1)
31 | # loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0)
32 | final_loss = (input2 * (input2.log() - input1.log())).mean()
33 | return final_loss * input1.size(0)
34 |
35 | class Losses_triplet(nn.Module):
36 | def __init__(self):
37 | super(Losses_triplet, self).__init__()
38 | self.loss = nn.functional.kl_div
39 |
40 |
41 | def forward(self, real_img, input1, input2):
42 | """
43 | KL divergence loss
44 | :param input1: fake source
45 | :param input2: fake target
46 | :param real_img: real source
47 | :return:
48 | """
49 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
50 | # assert input1.size() == 2, "more than two dimensions"
51 | input1_log = nn.functional.log_softmax(input1, dim = 1)
52 | input2_log = nn.functional.log_softmax(input2, dim = 1)
53 | # input1 = nn.functional.softmax(input1, dim = 1)
54 | # input2 = nn.functional.softmax(input2, dim = 1)
55 | real_img = nn.functional.softmax(real_img, dim = 1)
56 | positive_loss = self.loss(input2_log, real_img, size_average=True) * 1000.0
57 | # negative_loss = torch.max(0, 1.0 - self.loss(input1_log, real_img, size_average=True))
58 | negative_loss = 1.0 - self.loss(input1_log, real_img, size_average=True) * 1000.0
59 | if (negative_loss.data < 0.0).all():
60 | negative_loss.data = torch.cuda.FloatTensor([0.0])
61 | # print("posi: ", positive_loss)
62 | # print("nega: ", negative_loss)
63 | # loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0)
64 | return positive_loss + negative_loss
65 |
66 | class Losses_triplet_nll(nn.Module):
67 | def __init__(self):
68 | super(Losses_triplet_nll, self).__init__()
69 | self.loss = nn.functional.mse_loss
70 |
71 |
72 | def forward(self, real_img, input1, input2):
73 | """
74 | KL divergence loss
75 | :param input1: fake source
76 | :param input2: fake target
77 | :param real_img: real source
78 | :return:
79 | """
80 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
81 | # assert input1.size() == 2, "more than two dimensions"
82 | posi_dist = self.loss(input2, real_img)
83 | nega_dist = self.loss(input1, real_img)
84 |
85 | Pt = torch.exp(nega_dist) / (torch.exp(nega_dist) + torch.exp(posi_dist))
86 |
87 | loss_pt = -1.0 * torch.log(Pt)
88 |
89 | return loss_pt
90 |
91 |
92 | class GradReverse(Function):
93 |
94 | def __init__(self, lambd):
95 | self.lambd = lambd
96 |
97 | def forward(self, x):
98 | return x.view_as(x)
99 |
100 | def backward(self, grad_output):
101 | return (grad_output * -self.lambd)
102 |
103 |
104 | def grad_reverse(x, lambd):
105 | return GradReverse(lambd)(x)
106 |
107 |
108 | class Losses3(nn.Module):
109 | def __init__(self):
110 | super(Losses3, self).__init__()
111 | # self.loss = nn.functional.kl_div
112 |
113 |
114 | def forward(self, input1, input2):
115 | """
116 | KL divergence loss
117 | :param input1:
118 | :param input2:
119 | :return:
120 | """
121 | # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
122 | # assert input1.size() == 2, "more than two dimensions"
123 | input1 = nn.functional.log_softmax(input1, dim = 1)
124 | input2 = nn.functional.softmax(input2, dim = 1)
125 | loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0)
126 | return loss_output
127 |
128 | class Losses2(nn.Module):
129 | def __init__(self, in1_size, in2_size, out_size):
130 | super(Losses2, self).__init__()
131 | self.loss = nn.Bilinear(in1_size, in2_size, out_size, False)
132 |
133 | def forward(self, input1, input2):
134 | """
135 | Bilinear Transform Loss
136 | :param input1: (N, in1_size)
137 | :param input2: (N, in2_size)
138 | :return: (N, out_size)
139 | """
140 | return self.loss(input1, input2)
141 |
142 |
143 |
144 |
145 | def gaussian(window_size, sigma):
146 | gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
147 | return gauss / gauss.sum()
148 |
149 |
150 | def create_window(window_size, channel):
151 | _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
152 | _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
153 | window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
154 | return window
155 |
156 |
157 | def _ssim(img1, img2, window, window_size, channel, size_average=True):
158 | mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
159 | mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
160 |
161 | mu1_sq = mu1.pow(2)
162 | mu2_sq = mu2.pow(2)
163 | mu1_mu2 = mu1 * mu2
164 |
165 | sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
166 | sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
167 | sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
168 |
169 | C1 = 0.01 ** 2
170 | C2 = 0.03 ** 2
171 |
172 | ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
173 |
174 | if size_average:
175 | return ssim_map.mean()
176 | else:
177 | return ssim_map.mean(1).mean(1).mean(1)
178 |
179 |
180 | class SSIM(torch.nn.Module):
181 | def __init__(self, window_size=110, size_average=True):
182 | super(SSIM, self).__init__()
183 | self.window_size = window_size
184 | self.size_average = size_average
185 | self.channel = 1
186 | self.window = create_window(window_size, self.channel)
187 |
188 | def forward(self, img1, img2):
189 | (_, channel, _, _) = img1.size()
190 |
191 | if channel == self.channel and self.window.data.type() == img1.data.type():
192 | window = self.window
193 | else:
194 | window = create_window(self.window_size, channel)
195 |
196 | if img1.is_cuda:
197 | window = window.cuda(img1.get_device())
198 | window = window.type_as(img1)
199 |
200 | self.window = window
201 | self.channel = channel
202 |
203 | return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
204 |
205 |
206 | def ssim(img1, img2, window_size=110, size_average=True):
207 | (_, channel, _, _) = img1.size()
208 | window = create_window(window_size, channel)
209 |
210 | if img1.is_cuda:
211 | window = window.cuda(img1.get_device())
212 | window = window.type_as(img1)
213 |
214 | return _ssim(img1, img2, window, window_size, channel, size_average)
215 |
216 |
--------------------------------------------------------------------------------
/models/faster_rcnn/vgg_adver_expansion_cluster.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 18-6-22 9:30
3 | # @Author : Xinge
4 |
5 | from extensions import RoIPool
6 | from .faster_rcnn_adver_expansion_reweight_cluster import FasterRCNN_AdEx
7 | from models.head import NaiveRpnHead
8 | import torch.nn as nn
9 | import torch.utils.model_zoo as model_zoo
10 | import math
11 | # from .common_net import LayerNorm
12 |
13 | __all__ = [
14 | 'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
15 | 'vgg19_bn', 'vgg19',
16 | ]
17 |
18 |
19 | model_urls = {
20 | 'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
21 | 'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
22 | 'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
23 | 'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
24 | 'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
25 | 'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
26 | 'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
27 | 'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
28 | }
29 |
30 | class VGG(FasterRCNN_AdEx):
31 |
32 | def __init__(self, features, cfg):
33 | super(VGG, self).__init__(cfg['gan_model_flag'])
34 |
35 | self.features = features
36 | # self.features2 = features
37 | #drop out last pooling layer so that feature stride is 2^4
38 | last_pooling = self.features._modules.popitem(last = True)
39 | # last_pooling2 = self.features2._modules.popitem(last = True)
40 | # rpn head
41 | num_anchors = len(cfg['anchor_scales']) * len(cfg['anchor_ratios'])
42 | self.rpn_head = NaiveRpnHead(512, num_classes=2, num_anchors=num_anchors)
43 |
44 | # rcnn head
45 | self.roipooling = RoIPool(7, 7, 1.0 / cfg['anchor_stride'])
46 | self.classifier = nn.Sequential(
47 | nn.Linear(512 * 7 * 7, 4096),
48 | # nn.BatchNorm1d(num_features=4096),
49 | # LayerNorm(4096),
50 | nn.ReLU(True),
51 | nn.Dropout(),
52 | nn.Linear(4096, 4096),
53 | # LayerNorm(4096),
54 | # nn.BatchNorm1d(num_features=4096),
55 | nn.ReLU(True),
56 | nn.Dropout(),
57 | # nn.Linear(4096, num_classes),
58 | )
59 | self.fc_rcnn_cls = nn.Linear(4096, cfg['num_classes'])
60 | self.fc_rcnn_loc = nn.Linear(4096, cfg['num_classes'] * 4)
61 |
62 | self._initialize_weights()
63 |
64 | def feature_extractor(self, x):
65 | return self.features(x)
66 |
67 | # def feature_extractor2(self, x):
68 | # return self.features2(x)
69 |
70 | def rpn(self, x):
71 | return self.rpn_head(x)
72 |
73 | def rcnn(self, x, rois):
74 | assert(rois.shape[1] == 5)
75 | x = self.roipooling(x, rois) # x.size(): [512, 512, 7, 7]
76 | x = x.view(x.size(0), -1)
77 | x_fea = self.classifier(x) # torch.Size([512, 4096])
78 | rcnn_pred_cls = self.fc_rcnn_cls(x_fea)
79 | rcnn_pred_loc = self.fc_rcnn_loc(x_fea)
80 | return x_fea, rcnn_pred_cls, rcnn_pred_loc
81 |
82 | def _initialize_weights(self):
83 | # count = 1
84 | for m in self.modules():
85 | if isinstance(m, nn.Conv2d):
86 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
87 | m.weight.data.normal_(0, math.sqrt(2. / n))
88 | if m.bias is not None:
89 | m.bias.data.zero_()
90 | # if count <= 2:
91 | # m.eval()
92 | # count += 1
93 | elif isinstance(m, nn.BatchNorm2d):
94 | m.weight.data.fill_(1)
95 | m.bias.data.zero_()
96 | elif isinstance(m, nn.Linear):
97 | m.weight.data.normal_(0, 0.01)
98 | m.bias.data.zero_()
99 |
100 |
101 | def make_layers(cfg, batch_norm=False):
102 | layers = []
103 | in_channels = 3
104 | for v in cfg:
105 | if v == 'M':
106 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
107 | else:
108 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
109 | if batch_norm:
110 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
111 | else:
112 | layers += [conv2d, nn.ReLU(inplace=True)]
113 | in_channels = v
114 | return nn.Sequential(*layers)
115 |
116 |
117 | cfg = {
118 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
119 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
120 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
121 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
122 | }
123 |
124 |
125 | def vgg11(pretrained=False, **kwargs):
126 | """VGG 11-layer model (configuration "A")
127 |
128 | Args:
129 | pretrained (bool): If True, returns a model pre-trained on ImageNet
130 | """
131 | model = VGG(make_layers(cfg['A']), **kwargs)
132 | if pretrained:
133 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
134 | return model
135 |
136 |
137 | def vgg11_bn(pretrained=False, **kwargs):
138 | """VGG 11-layer model (configuration "A") with batch normalization
139 |
140 | Args:
141 | pretrained (bool): If True, returns a model pre-trained on ImageNet
142 | """
143 | model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
144 | if pretrained:
145 | model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
146 | return model
147 |
148 |
149 | def vgg13(pretrained=False, **kwargs):
150 | """VGG 13-layer model (configuration "B")
151 |
152 | Args:
153 | pretrained (bool): If True, returns a model pre-trained on ImageNet
154 | """
155 | model = VGG(make_layers(cfg['B']), **kwargs)
156 | if pretrained:
157 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13']))
158 | return model
159 |
160 |
161 | def vgg13_bn(pretrained=False, **kwargs):
162 | """VGG 13-layer model (configuration "B") with batch normalization
163 |
164 | Args:
165 | pretrained (bool): If True, returns a model pre-trained on ImageNet
166 | """
167 | model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
168 | if pretrained:
169 | model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn']))
170 | return model
171 |
172 |
173 | def vgg16(pretrained=False, **kwargs):
174 | """VGG 16-layer model (configuration "D")
175 |
176 | Args:
177 | pretrained (bool): If True, returns a model pre-trained on ImageNet
178 | """
179 | model = VGG(make_layers(cfg['D']), **kwargs)
180 | if pretrained:
181 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
182 | return model
183 |
184 |
185 | def vgg16_bn(pretrained=False, **kwargs):
186 | """VGG 16-layer model (configuration "D") with batch normalization
187 |
188 | Args:
189 | pretrained (bool): If True, returns a model pre-trained on ImageNet
190 | """
191 | model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
192 | if pretrained:
193 | model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
194 | return model
195 |
196 |
197 | def vgg19(pretrained=False, **kwargs):
198 | """VGG 19-layer model (configuration "E")
199 |
200 | Args:
201 | pretrained (bool): If True, returns a model pre-trained on ImageNet
202 | """
203 | model = VGG(make_layers(cfg['E']), **kwargs)
204 | if pretrained:
205 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19']))
206 | return model
207 |
208 |
209 | def vgg19_bn(pretrained=False, **kwargs):
210 | """VGG 19-layer model (configuration 'E') with batch normalization
211 |
212 | Args:
213 | pretrained (bool): If True, returns a model pre-trained on ImageNet
214 | """
215 | model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
216 | if pretrained:
217 | model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn']))
218 | return model
219 |
--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C" {
3 | #endif
4 |
5 | #include
6 | #include
7 | #include
8 | #include "roi_align_kernel.h"
9 |
10 | #define CUDA_1D_KERNEL_LOOP(i, n) \
11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
12 | i += blockDim.x * gridDim.x)
13 |
14 |
15 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
16 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
17 | CUDA_1D_KERNEL_LOOP(index, nthreads) {
18 | // (n, c, ph, pw) is an element in the aligned output
19 | // int n = index;
20 | // int pw = n % aligned_width;
21 | // n /= aligned_width;
22 | // int ph = n % aligned_height;
23 | // n /= aligned_height;
24 | // int c = n % channels;
25 | // n /= channels;
26 |
27 | int pw = index % aligned_width;
28 | int ph = (index / aligned_width) % aligned_height;
29 | int c = (index / aligned_width / aligned_height) % channels;
30 | int n = index / aligned_width / aligned_height / channels;
31 |
32 | // bottom_rois += n * 5;
33 | float roi_batch_ind = bottom_rois[n * 5 + 0];
34 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
35 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
36 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
37 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
38 |
39 | // Force malformed ROIs to be 1x1
40 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
41 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
42 | float bin_size_h = roi_height / (aligned_height - 1.);
43 | float bin_size_w = roi_width / (aligned_width - 1.);
44 |
45 | float h = (float)(ph) * bin_size_h + roi_start_h;
46 | float w = (float)(pw) * bin_size_w + roi_start_w;
47 |
48 | int hstart = fminf(floor(h), height - 2);
49 | int wstart = fminf(floor(w), width - 2);
50 |
51 | int img_start = roi_batch_ind * channels * height * width;
52 |
53 | // bilinear interpolation
54 | if (h < 0 || h >= height || w < 0 || w >= width) {
55 | top_data[index] = 0.;
56 | } else {
57 | float h_ratio = h - (float)(hstart);
58 | float w_ratio = w - (float)(wstart);
59 | int upleft = img_start + (c * height + hstart) * width + wstart;
60 | int upright = upleft + 1;
61 | int downleft = upleft + width;
62 | int downright = downleft + 1;
63 |
64 | top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
65 | + bottom_data[upright] * (1. - h_ratio) * w_ratio
66 | + bottom_data[downleft] * h_ratio * (1. - w_ratio)
67 | + bottom_data[downright] * h_ratio * w_ratio;
68 | }
69 | }
70 | }
71 |
72 |
73 | int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
74 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
75 | const int kThreadsPerBlock = 1024;
76 | const int output_size = num_rois * aligned_height * aligned_width * channels;
77 | cudaError_t err;
78 |
79 |
80 | ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
81 | output_size, bottom_data, spatial_scale, height, width, channels,
82 | aligned_height, aligned_width, bottom_rois, top_data);
83 |
84 | err = cudaGetLastError();
85 | if(cudaSuccess != err) {
86 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
87 | exit( -1 );
88 | }
89 |
90 | return 1;
91 | }
92 |
93 |
94 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
95 | const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
96 | CUDA_1D_KERNEL_LOOP(index, nthreads) {
97 |
98 | // (n, c, ph, pw) is an element in the aligned output
99 | int pw = index % aligned_width;
100 | int ph = (index / aligned_width) % aligned_height;
101 | int c = (index / aligned_width / aligned_height) % channels;
102 | int n = index / aligned_width / aligned_height / channels;
103 |
104 | float roi_batch_ind = bottom_rois[n * 5 + 0];
105 | float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 | float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 | float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 | float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 | /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 | /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 | /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 | /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 |
114 | // Force malformed ROIs to be 1x1
115 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 | float bin_size_h = roi_height / (aligned_height - 1.);
118 | float bin_size_w = roi_width / (aligned_width - 1.);
119 |
120 | float h = (float)(ph) * bin_size_h + roi_start_h;
121 | float w = (float)(pw) * bin_size_w + roi_start_w;
122 |
123 | int hstart = fminf(floor(h), height - 2);
124 | int wstart = fminf(floor(w), width - 2);
125 |
126 | int img_start = roi_batch_ind * channels * height * width;
127 |
128 | // bilinear interpolation
129 | if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 | float h_ratio = h - (float)(hstart);
131 | float w_ratio = w - (float)(wstart);
132 | int upleft = img_start + (c * height + hstart) * width + wstart;
133 | int upright = upleft + 1;
134 | int downleft = upleft + width;
135 | int downright = downleft + 1;
136 |
137 | atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 | atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 | atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 | atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 | }
142 | }
143 | }
144 |
145 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 | const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 | const int kThreadsPerBlock = 1024;
148 | const int output_size = num_rois * aligned_height * aligned_width * channels;
149 | cudaError_t err;
150 |
151 | ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 | output_size, top_diff, spatial_scale, height, width, channels,
153 | aligned_height, aligned_width, bottom_diff, bottom_rois);
154 |
155 | err = cudaGetLastError();
156 | if(cudaSuccess != err) {
157 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 | exit( -1 );
159 | }
160 |
161 | return 1;
162 | }
163 |
164 |
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 |
--------------------------------------------------------------------------------
/models/faster_rcnn/test_module.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # @Time : 18-6-23 1:48
3 | # @Author : Xinge
4 |
5 |
6 | import torch.nn as nn
7 | import torch
8 | # from torch.autograd import Variable
9 | from common_net import *
10 | import torch.nn.functional as F
11 |
12 | class GAN_dis_AE(nn.Module):
13 | def __init__(self, params):
14 | super(GAN_dis_AE, self).__init__()
15 | ch = params['ch'] # 32
16 | input_dim_a = params['input_dim_a'] # 3
17 |
18 | n_layer = params['n_layer'] # 5
19 | self.model_A = self._make_net(ch, input_dim_a, n_layer - 1) # for the first stage
20 | self.model_A.apply(gaussian_weights_init)
21 | self.model_B = self._make_net(ch, input_dim_a, n_layer - 1) # for the first stage
22 | self.model_B.apply(gaussian_weights_init)
23 |
24 |
25 |
26 | def _make_net(self, ch, input_dim, n_layer):
27 | model = []
28 | model += [LeakyReLUConv2d(input_dim, ch, kernel_size=3, stride=2, padding=1)] # 16
29 | tch = ch
30 | for i in range(0, n_layer):
31 | model += [LeakyReLUConv2d(tch, tch * 2, kernel_size=3, stride=2, padding=1)] # 8
32 | tch *= 2
33 | model += [nn.Conv2d(tch, 1, kernel_size=1, stride=1, padding=0)] # 1
34 | return nn.Sequential(*model)
35 |
36 | def forward(self, x_aa, x_bb):
37 | """
38 | :param x_bA: the concatenation of
39 | :param x_aB:
40 | :param rois_feature: (512 x 4096)
41 | :return:
42 | """
43 | # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0)
44 | out_A = self.model_A(x_aa)
45 | out_A = out_A.view(out_A.size(0), -1)
46 | out_B = self.model_B(x_bb)
47 | out_B = out_B.view(out_B.size(0), -1)
48 |
49 | # out = torch.cat((out_A, out_B), 0)
50 | return out_A, out_B
51 |
52 |
53 |
54 | class GAN_dis_AE_patch(nn.Module):
55 | def __init__(self):
56 | super(GAN_dis_AE_patch, self).__init__()
57 | # for source domain only
58 | model_A_patch = [ResDis_cluster(n_in=128, n_out=256, kernel_size=3, stride=2, padding=1, w=64, h=64)]
59 | self.model_A_patch = nn.Sequential(*model_A_patch)
60 | # self.model_A_patch.apply(gaussian_weights_init)
61 |
62 | def forward(self, rois_features):
63 | out_C = self.model_A_patch(rois_features)
64 | out_C = torch.sigmoid(out_C) # size(4, 512)
65 | return out_C
66 |
67 | # class GAN_dis_AE_patch_tar(nn.Module):
68 | # def __init__(self):
69 | # super(GAN_dis_AE_patch_tar, self).__init__()
70 | # # for source domain only
71 | # model_A_patch = [ResDis_cluster(n_in=512, n_out=512, kernel_size=3, stride=2, padding=1, w=64, h=64)]
72 | # self.model_A_patch = nn.Sequential(*model_A_patch)
73 | # self.model_A_patch.apply(gaussian_weights_init)
74 | #
75 | # def forward(self, rois_features):
76 | # out_C = self.model_A_patch(rois_features)
77 | # out_C = torch.sigmoid(out_C)
78 | # return out_C
79 |
80 | class GAN_decoder_AE(nn.Module):
81 | def __init__(self, params):
82 | super(GAN_decoder_AE, self).__init__()
83 | input_dim_b = params['input_dim_b']
84 | ch = params['ch'] # 32
85 | # n_gen_shared_blk = params['n_gen_shared_blk']
86 | n_gen_res_blk = params['n_gen_res_blk'] # 4
87 | n_gen_front_blk = params['n_gen_front_blk'] # 3
88 | if 'res_dropout_ratio' in params.keys():
89 | res_dropout_ratio = params['res_dropout_ratio']
90 | else:
91 | res_dropout_ratio = 0
92 |
93 | # self.embedding1= nn.Linear(4096, 2048, bias=None)
94 | # self.embedding2 = nn.Linear(4096, 2048, bias=None)
95 | if 'neww' in params.keys():
96 | neww = params['neww']
97 | else:
98 | neww = 64
99 |
100 | if 'newh' in params.keys():
101 | newh = params['newh']
102 | else:
103 | newh = 64
104 |
105 | tch = ch
106 | decB = []
107 | decA = []
108 | decB += [LinUnsRes_cluster(128, neww, newh)]
109 | decA += [LinUnsRes_cluster(128, neww, newh)]
110 |
111 | for i in range(0, n_gen_res_blk):
112 | decB += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
113 | decA += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
114 | for i in range(0, n_gen_front_blk-1):
115 | decB += [LeakyReLUConvTranspose2d_2(tch, tch//2, kernel_size=3, stride=1, padding=1, output_padding=0)]
116 | decA += [LeakyReLUConvTranspose2d_2(tch, tch//2, kernel_size=3, stride=1, padding=1, output_padding=0)]
117 | tch = tch//2
118 | # decB += [nn.Conv2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
119 | decB += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
120 | decA += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
121 | decB += [nn.Tanh()]
122 | decA += [nn.Tanh()]
123 |
124 | # decB += [nn.LeakyReLU(inplace=True)]
125 | # self.dec_shared = nn.Sequential(*dec_shared)
126 | self.decode_B = nn.Sequential(*decB)
127 | self.decode_B.apply(gaussian_weights_init)
128 | self.decode_A = nn.Sequential(*decA)
129 | self.decode_A.apply(gaussian_weights_init)
130 |
131 | def forward(self, x_aa, x_bb):
132 | # x_aa and x_bb is 512 x 4096 ==> 512 x 64 x 64
133 | # out = self.dec_shared(x_A)
134 | # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0)
135 | out1 = self.decode_A(x_aa)
136 | out2 = self.decode_B(x_bb)
137 | # out = torch.cat((out1, out2), 0)
138 | return out1, out2
139 |
140 | class GAN_decoder_AE_de(nn.Module):
141 | def __init__(self, params):
142 | super(GAN_decoder_AE_de, self).__init__()
143 | input_dim_b = params['input_dim_b']
144 | ch = params['ch'] # 32
145 | # n_gen_shared_blk = params['n_gen_shared_blk']
146 | n_gen_res_blk = params['n_gen_res_blk'] # 3
147 | n_gen_front_blk = params['n_gen_front_blk'] # 4
148 | if 'res_dropout_ratio' in params.keys():
149 | res_dropout_ratio = params['res_dropout_ratio']
150 | else:
151 | res_dropout_ratio = 0
152 |
153 | # self.embedding1= nn.Linear(4096, 2048, bias=None)
154 | # self.embedding2 = nn.Linear(4096, 2048, bias=None)
155 | if 'neww' in params.keys():
156 | neww = params['neww']
157 | else:
158 | neww = 64
159 |
160 | if 'newh' in params.keys():
161 | newh = params['newh']
162 | else:
163 | newh = 64
164 |
165 | tch = ch
166 | decB = []
167 | decA = []
168 | decB += [LinUnsRes_cluster(128, neww, newh)]
169 | decA += [LinUnsRes_cluster(128, neww, newh)]
170 |
171 | for i in range(0, n_gen_res_blk):
172 | decB += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
173 | decA += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
174 | for i in range(0, n_gen_front_blk - 1):
175 | decB += [LeakyReLUConvTranspose2d(tch, tch // 2, kernel_size=3, stride=2, padding=1, output_padding=1)]
176 | decA += [LeakyReLUConvTranspose2d(tch, tch // 2, kernel_size=3, stride=2, padding=1, output_padding=1)]
177 | tch = tch // 2
178 | # decB += [nn.Conv2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
179 | decB += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
180 | decA += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
181 | decB += [nn.Tanh()]
182 | decA += [nn.Tanh()]
183 |
184 | # decB += [nn.LeakyReLU(inplace=True)]
185 | # self.dec_shared = nn.Sequential(*dec_shared)
186 | self.decode_B = nn.Sequential(*decB)
187 | self.decode_B.apply(gaussian_weights_init)
188 | self.decode_A = nn.Sequential(*decA)
189 | self.decode_A.apply(gaussian_weights_init)
190 |
191 |
192 | def forward(self, x_aa, x_bb):
193 | # x_aa and x_bb is 512 x 4096 ==> 512 x 64 x 64
194 | # out = self.dec_shared(x_A)
195 | # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0)
196 | out1 = self.decode_A(x_aa)
197 | out2 = self.decode_B(x_bb)
198 | # out = torch.cat((out1, out2), 0)
199 | return out1, out2
200 |
201 |
202 |
--------------------------------------------------------------------------------
/utils/visualize_helper.py:
--------------------------------------------------------------------------------
1 | #encoding:utf8
2 |
3 | from utils import bbox_helper
4 | try:
5 | from graphviz import Digraph
6 | except Exception as e:
7 | print(e)
8 | import torch
9 | import numpy as np
10 | import cv2
11 | import os
12 |
13 | classes = [
14 | '__background__', # always index 0
15 | 'aeroplane', 'bicycle', 'bird', 'boat',
16 | 'bottle', 'bus', 'car', 'cat', 'chair',
17 | 'cow', 'diningtable', 'dog', 'horse',
18 | 'motorbike', 'person', 'pottedplant',
19 | 'sheep', 'sofa', 'train', 'tvmonitor'
20 | ]
21 |
22 | def draw_bbox(img, bbox, color = (255,0,0)):
23 | box = np.array(bbox).astype(np.int32)
24 | return cv2.rectangle(img, tuple(box[0:2]), tuple(box[2:4]), color)
25 |
26 | def draw_keypoint(img, keypoints, color = (255,0,0)):
27 | kpts = keypoints.reshape(-1, 2).astype(np.int32)
28 | for k in range(kpts.shape[0]):
29 | if k&1:
30 | cv2.circle(img, tuple(kpts[k]), 2, color, thickness=2) # left parts:blue
31 | else:
32 | cv2.circle(img, tuple(kpts[k]), 2, color[::-1], thickness=2) # right parts: red
33 | return img
34 | def draw_mask(img, mask, thresh = 0.5):
35 | assert img.shape == mask.shape, 'img.shape:{} vs mask.shape'.format(img.shape, mask.shape)
36 | mask = (mask > thresh).astype(np.uint8) * 250
37 | img *= 0.5
38 | img += mask[..., np.newaxis] * 0.5
39 | return img
40 |
41 |
42 | def vis_results(results_dir,image_info, bboxes, keypoints, masks, heatmap, class_names):
43 | from utils.debug_helper import debugger
44 | import logging
45 | logger = logging.getLogger('global')
46 | batch_size = len(image_info)
47 | if not os.path.exists(results_dir):
48 | os.makedirs(results_dir)
49 | for b_ix in range(batch_size):
50 | image_size = image_info[b_ix]
51 | keep_ix = np.where(bboxes[:, 0] == b_ix)[0]
52 | bbs = bboxes[keep_ix]
53 | kps = keypoints[keep_ix, :, :2] if keypoints else None
54 | msks = [masks[ix] for ix in keep_ix] if masks else None
55 |
56 | hmap = heatmap[keep_ix]
57 | filename = debugger.get_filename(b_ix).split('/')[-1].split('.')[0]
58 | for r_ix, B in enumerate(bbs):
59 | box_score, class_id = B[-2:]
60 | if box_score < 0.9:
61 | continue
62 |
63 | image = debugger.get_image(b_ix).copy()
64 | x1, y1, x2, y2 = map(int, B[1:1+4])
65 | r_h = y2 - y1
66 | r_w = x2 - x1
67 | draw_bbox(image, B[1:1+4])
68 | category_name = class_names[int(class_id)]
69 | cv2.putText(image, 'category:{0}, score:{1}'.format(category_name,box_score), (100, 100), 2, 1, (0, 0, 255))
70 | logger.info('{0}/{1}_{2}.jpg'.format(results_dir, filename, r_ix))
71 |
72 | if kps:
73 | draw_keypoint(image, kps[r_ix])
74 | #for k in range(hmap.shape[1]):
75 | # hp = hmap[r_ix, k]
76 | # hp = cv2.resize(hp, (r_w, r_h)) * 250
77 | # hp[hp < 0] = 0
78 | # img = image.copy()
79 | # img[y1:y2, x1:x2, ...] *= 0.5
80 | # img[y1:y2, x1:x2, ...] += hp[..., np.newaxis] * 0.5
81 | # cv2.imwrite('{0}/{1}_{2}_{3}.jpg'.format(results_dir, filename, r_ix, k), img)
82 | cv2.imwrite('{0}/{1}_{2}_keypoints.jpg'.format(results_dir, filename, r_ix), image)
83 | hp = cv2.resize(np.max(hmap[r_ix], axis=0), (r_w, r_h)) * 100
84 | hp[hp < 0] = 0
85 | image[y1:y2, x1:x2, ...] *= 0.5
86 | image[y1:y2, x1:x2, ...] += hp[..., np.newaxis] * 0.5
87 | cv2.imwrite('{0}/{1}_{2}_heatmap.jpg'.format(results_dir, filename, r_ix), image)
88 | if msks:
89 | draw_mask(image, msks[r_ix])
90 | cv2.imwrite('{0}/{1}_{2}_mask.jpg'.format(results_dir, filename, r_ix), image)
91 |
92 | def vis_detections(img, bboxes, gts, img_name, score_thresh):
93 | vis_dir = 'visualize'
94 | if not os.path.exists(vis_dir):
95 | os.makedirs(vis_dir)
96 | img_name = img_name.rsplit('/',1)[-1].split('.')[0]
97 | overlaps = bbox_helper.bbox_iou_overlaps(bboxes, gts)
98 | max_overlaps = overlaps.max(axis=1)
99 | for box_ix in range(bboxes.shape[0]):
100 | box = bboxes[box_ix, :4].astype(np.int32)
101 | score = bboxes[box_ix, 4]
102 | if score < score_thresh:
103 | continue
104 | cls = int(bboxes[box_ix, 5])
105 | img_cpy = img.copy()
106 | ov = max_overlaps[box_ix]
107 | text = 'label:%s, iou:%.3f, score:%.3f' % (classes[cls], ov, score)
108 | cv2.putText(img_cpy, text, (30, 30), 2, 0.8, (0, 0, 255))
109 | vis = cv2.rectangle(img_cpy, tuple(box[0:2]), tuple(box[2:4]), (255, 0, 0))
110 | cv2.imwrite('%s/%s_%d.jpg' %(vis_dir, img_name, box_ix), vis)
111 |
112 | def vis_batch(input, output_dir, prefix):
113 | from utils.debug_helper import debugger
114 | import logging
115 | logger = logging.getLogger('global')
116 | if not os.path.exists(output_dir):
117 | os.makedirs(output_dir)
118 | if torch.is_tensor(input[0]):
119 | debugger.store_tensor_as_image(input[0])
120 |
121 | image_info = input[1]
122 | gt_boxes = input[2]
123 | ignores = input[3]
124 | kpts = input[4]
125 | masks = input[5]
126 | #filenames = input[6]
127 | B = gt_boxes.shape[0]
128 | for b in range(B):
129 | #image = imgs[b]
130 | image = debugger.get_image(b)
131 | bxs = gt_boxes[b]
132 | #igs = ignores[b]
133 | kts = kpts[b]
134 | #mks = masks[b]
135 | n = bxs.shape[0]
136 | for ix in range(n):
137 | img_cpy = image.copy()
138 | draw_bbox(img_cpy, bxs[ix])
139 | draw_keypoint(img_cpy, kts[ix])
140 | #draw_mask(img_cpy, mks[ix])
141 | filename = os.path.join(output_dir, '{0}_{1}_{2}.jpg'.format(prefix, b, ix))
142 | cv2.imwrite(filename, img_cpy)
143 | #for ix in range(igs.shape[0]):
144 | # img_cpy = imgs[b].copy()
145 | # draw_bbox(img_cpy, igs[ix], color=(0,0,255))
146 | # filename = os.path.join(test_dir, '{0}_{1}_{2}.jpg'.format(prefix, b, ix + n))
147 | # cv2.imwrite(filename, img_cpy)
148 |
149 | def make_dot(var, params=None):
150 | """ Produces Graphviz representation of PyTorch autograd graph
151 |
152 | Blue nodes are the Variables that require grad, orange are Tensors
153 | saved for backward in torch.autograd.Function
154 |
155 | Args:
156 | var: output Variable
157 | params: dict of (name, Variable) to add names to node that
158 | require grad (TODO: make optional)
159 | """
160 | if params is not None:
161 | # assert isinstance(params.values()[0], Variable)
162 | param_map = {id(v): k for k, v in params.items()}
163 |
164 | node_attr = dict(style='filled',
165 | shape='box',
166 | align='left',
167 | fontsize='12',
168 | ranksep='0.1',
169 | height='0.2')
170 | dot = Digraph(node_attr=node_attr, graph_attr=dict(size="20,20"), format='svg')
171 | seen = set()
172 |
173 | def size_to_str(size):
174 | return '('+(', ').join(['%d' % v for v in size])+')'
175 |
176 | def add_nodes(var):
177 | if var not in seen:
178 | if torch.is_tensor(var):
179 | dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
180 | elif hasattr(var, 'variable'):
181 | u = var.variable
182 | name = param_map[id(u)] if params is not None else ''
183 | node_name = '%s\n %s' % (name, size_to_str(u.size()))
184 | dot.node(str(id(var)), node_name, fillcolor='lightblue')
185 | else:
186 | dot.node(str(id(var)), str(type(var).__name__))
187 | seen.add(var)
188 | if hasattr(var, 'next_functions'):
189 | for u in var.next_functions:
190 | if u[0] is not None:
191 | dot.edge(str(id(u[0])), str(id(var)))
192 | add_nodes(u[0])
193 | if hasattr(var, 'saved_tensors'):
194 | for t in var.saved_tensors:
195 | dot.edge(str(id(t)), str(id(var)))
196 | add_nodes(t)
197 | add_nodes(var.grad_fn)
198 | return dot
199 |
200 | def visualize(var, filename):
201 | make_dot()
202 |
--------------------------------------------------------------------------------
| | | | |